{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "statewide-blank",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2021-04-20 19:45:12,969 INFO submit.py[180] request: http://localhost:8000/v1/evaluation/statistics/ with data {'job_id': 10178, 'type': 'csv', 'method': 'basic', 'with_header': True, 'from_sampling': True}\n"
     ]
    }
   ],
   "source": [
    "from sparksampling import Submitter\n",
    "from sparksampling.utilities.var import FILE_TYPE_CSV\n",
    "from sparksampling.utilities.var import SIMPLE_RANDOM_SAMPLING_METHOD\n",
    "submitter = Submitter()\n",
    "\n",
    "job_id = 10178\n",
    "\n",
    "data = submitter.get_statistics(job_id=job_id, from_sampling=True, file_type=FILE_TYPE_CSV,with_header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "important-pulse",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>summary</th>\n",
       "      <th># id</th>\n",
       "      <th>X_0</th>\n",
       "      <th>X_1</th>\n",
       "      <th>X_2</th>\n",
       "      <th>X_3</th>\n",
       "      <th>X_4</th>\n",
       "      <th>X_5</th>\n",
       "      <th>X_6</th>\n",
       "      <th>X_7</th>\n",
       "      <th>...</th>\n",
       "      <th>X_91</th>\n",
       "      <th>X_92</th>\n",
       "      <th>X_93</th>\n",
       "      <th>X_94</th>\n",
       "      <th>X_95</th>\n",
       "      <th>X_96</th>\n",
       "      <th>X_97</th>\n",
       "      <th>X_98</th>\n",
       "      <th>X_99</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>count</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>...</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>mean</td>\n",
       "      <td>464.99</td>\n",
       "      <td>1.96</td>\n",
       "      <td>2.93</td>\n",
       "      <td>2.99</td>\n",
       "      <td>2.83</td>\n",
       "      <td>4.03</td>\n",
       "      <td>4.34</td>\n",
       "      <td>3.21</td>\n",
       "      <td>2.68</td>\n",
       "      <td>...</td>\n",
       "      <td>0.02870580720599997</td>\n",
       "      <td>-0.16376946496830003</td>\n",
       "      <td>-0.019567302474000008</td>\n",
       "      <td>-0.027822220310000007</td>\n",
       "      <td>0.152076216818</td>\n",
       "      <td>0.017592778044000015</td>\n",
       "      <td>-0.05921129650999999</td>\n",
       "      <td>0.03183527405300001</td>\n",
       "      <td>-0.13985298216999997</td>\n",
       "      <td>0.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>stddev</td>\n",
       "      <td>283.8308464454478</td>\n",
       "      <td>1.8086213288334045</td>\n",
       "      <td>2.3495325131402267</td>\n",
       "      <td>1.9974478666032762</td>\n",
       "      <td>1.9125053640909788</td>\n",
       "      <td>1.572844043075595</td>\n",
       "      <td>1.3575081151979471</td>\n",
       "      <td>1.6593475655914722</td>\n",
       "      <td>1.8688299153548598</td>\n",
       "      <td>...</td>\n",
       "      <td>1.348051318321073</td>\n",
       "      <td>0.9309219608424282</td>\n",
       "      <td>1.4072218698992587</td>\n",
       "      <td>0.8872036139044543</td>\n",
       "      <td>0.9261388529896935</td>\n",
       "      <td>1.2685148427186093</td>\n",
       "      <td>1.4700254701980153</td>\n",
       "      <td>0.8536691405900096</td>\n",
       "      <td>1.2155347613856309</td>\n",
       "      <td>0.5024183937956913</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>min</td>\n",
       "      <td>101</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.14131553</td>\n",
       "      <td>-0.00099003483</td>\n",
       "      <td>-0.032338354</td>\n",
       "      <td>-0.011785354</td>\n",
       "      <td>-0.0049381842</td>\n",
       "      <td>-0.0030899136</td>\n",
       "      <td>-0.0088573574</td>\n",
       "      <td>-0.0020981927</td>\n",
       "      <td>-0.013695239</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>max</td>\n",
       "      <td>997</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>2.8732301</td>\n",
       "      <td>2.2820269</td>\n",
       "      <td>2.773148</td>\n",
       "      <td>1.7608078999999999</td>\n",
       "      <td>2.1026392999999994</td>\n",
       "      <td>2.4105882999999997</td>\n",
       "      <td>4.2090319</td>\n",
       "      <td>1.9963511000000003</td>\n",
       "      <td>3.1549591</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 103 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  summary               # id                 X_0                 X_1  \\\n",
       "0   count                100                 100                 100   \n",
       "1    mean             464.99                1.96                2.93   \n",
       "2  stddev  283.8308464454478  1.8086213288334045  2.3495325131402267   \n",
       "3     min                101                   0                   0   \n",
       "4     max                997                   6                   6   \n",
       "\n",
       "                  X_2                 X_3                X_4  \\\n",
       "0                 100                 100                100   \n",
       "1                2.99                2.83               4.03   \n",
       "2  1.9974478666032762  1.9125053640909788  1.572844043075595   \n",
       "3                   0                   0                  1   \n",
       "4                   6                   6                  6   \n",
       "\n",
       "                  X_5                 X_6                 X_7  ...  \\\n",
       "0                 100                 100                 100  ...   \n",
       "1                4.34                3.21                2.68  ...   \n",
       "2  1.3575081151979471  1.6593475655914722  1.8688299153548598  ...   \n",
       "3                   2                   1                   0  ...   \n",
       "4                   6                   6                   6  ...   \n",
       "\n",
       "                  X_91                  X_92                   X_93  \\\n",
       "0                  100                   100                    100   \n",
       "1  0.02870580720599997  -0.16376946496830003  -0.019567302474000008   \n",
       "2    1.348051318321073    0.9309219608424282     1.4072218698992587   \n",
       "3          -0.14131553        -0.00099003483           -0.032338354   \n",
       "4            2.8732301             2.2820269               2.773148   \n",
       "\n",
       "                    X_94                X_95                  X_96  \\\n",
       "0                    100                 100                   100   \n",
       "1  -0.027822220310000007      0.152076216818  0.017592778044000015   \n",
       "2     0.8872036139044543  0.9261388529896935    1.2685148427186093   \n",
       "3           -0.011785354       -0.0049381842         -0.0030899136   \n",
       "4     1.7608078999999999  2.1026392999999994    2.4105882999999997   \n",
       "\n",
       "                   X_97                 X_98                  X_99  \\\n",
       "0                   100                  100                   100   \n",
       "1  -0.05921129650999999  0.03183527405300001  -0.13985298216999997   \n",
       "2    1.4700254701980153   0.8536691405900096    1.2155347613856309   \n",
       "3         -0.0088573574        -0.0020981927          -0.013695239   \n",
       "4             4.2090319   1.9963511000000003             3.1549591   \n",
       "\n",
       "                    y  \n",
       "0                 100  \n",
       "1                0.49  \n",
       "2  0.5024183937956913  \n",
       "3                   0  \n",
       "4                   1  \n",
       "\n",
       "[5 rows x 103 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "ecological-martin",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2021-04-20 20:14:26,100 INFO submit.py[180] request: http://localhost:8000/v1/evaluation/statistics/ with data {'path': 'hdfs://localhost:9000/dataset/ten_million_top1k.csv', 'type': 'csv', 'method': 'basic', 'with_header': True, 'from_sampling': False}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>summary</th>\n",
       "      <th># id</th>\n",
       "      <th>X_0</th>\n",
       "      <th>X_1</th>\n",
       "      <th>X_2</th>\n",
       "      <th>X_3</th>\n",
       "      <th>X_4</th>\n",
       "      <th>X_5</th>\n",
       "      <th>X_6</th>\n",
       "      <th>X_7</th>\n",
       "      <th>...</th>\n",
       "      <th>X_91</th>\n",
       "      <th>X_92</th>\n",
       "      <th>X_93</th>\n",
       "      <th>X_94</th>\n",
       "      <th>X_95</th>\n",
       "      <th>X_96</th>\n",
       "      <th>X_97</th>\n",
       "      <th>X_98</th>\n",
       "      <th>X_99</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>count</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>...</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>mean</td>\n",
       "      <td>499.5</td>\n",
       "      <td>2.049</td>\n",
       "      <td>3.051</td>\n",
       "      <td>2.815</td>\n",
       "      <td>3.118</td>\n",
       "      <td>4.211</td>\n",
       "      <td>3.992</td>\n",
       "      <td>3.351</td>\n",
       "      <td>2.658</td>\n",
       "      <td>...</td>\n",
       "      <td>0.029485678112799993</td>\n",
       "      <td>-0.015578821739792</td>\n",
       "      <td>-0.034382649559500045</td>\n",
       "      <td>0.03410696849619999</td>\n",
       "      <td>0.004251966935300004</td>\n",
       "      <td>0.031006250621699986</td>\n",
       "      <td>-0.008798382997400003</td>\n",
       "      <td>0.009004523887710001</td>\n",
       "      <td>-0.05045604444349996</td>\n",
       "      <td>0.483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>stddev</td>\n",
       "      <td>288.8194360957494</td>\n",
       "      <td>1.8889592815005751</td>\n",
       "      <td>2.394186945735675</td>\n",
       "      <td>2.0559674126799785</td>\n",
       "      <td>1.863208938242715</td>\n",
       "      <td>1.6208350027396687</td>\n",
       "      <td>1.5317579058984108</td>\n",
       "      <td>1.7728344372619638</td>\n",
       "      <td>1.9335911601925522</td>\n",
       "      <td>...</td>\n",
       "      <td>1.310014162316104</td>\n",
       "      <td>0.9928149233047021</td>\n",
       "      <td>1.3489092513130678</td>\n",
       "      <td>1.0175306226012728</td>\n",
       "      <td>0.9937576399485477</td>\n",
       "      <td>1.2158726897277283</td>\n",
       "      <td>1.4720581081411457</td>\n",
       "      <td>0.8531823148864874</td>\n",
       "      <td>1.0285753546436174</td>\n",
       "      <td>0.4999609594367951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>min</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.0021861139000000003</td>\n",
       "      <td>-0.00099003483</td>\n",
       "      <td>-0.007215624499999999</td>\n",
       "      <td>-0.0087255505</td>\n",
       "      <td>-0.0049381842</td>\n",
       "      <td>-0.0030899136</td>\n",
       "      <td>-0.0012198847</td>\n",
       "      <td>-0.0017179298999999999</td>\n",
       "      <td>-0.00151381</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>max</td>\n",
       "      <td>999</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>4.247343099999999</td>\n",
       "      <td>3.266212</td>\n",
       "      <td>3.7208654</td>\n",
       "      <td>3.1033252</td>\n",
       "      <td>3.1775257999999997</td>\n",
       "      <td>3.5107522000000007</td>\n",
       "      <td>5.6920133</td>\n",
       "      <td>3.0476226000000004</td>\n",
       "      <td>3.1554324</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 103 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  summary               # id                 X_0                X_1  \\\n",
       "0   count               1000                1000               1000   \n",
       "1    mean              499.5               2.049              3.051   \n",
       "2  stddev  288.8194360957494  1.8889592815005751  2.394186945735675   \n",
       "3     min                  0                   0                  0   \n",
       "4     max                999                   6                  6   \n",
       "\n",
       "                  X_2                X_3                 X_4  \\\n",
       "0                1000               1000                1000   \n",
       "1               2.815              3.118               4.211   \n",
       "2  2.0559674126799785  1.863208938242715  1.6208350027396687   \n",
       "3                   0                  0                   1   \n",
       "4                   6                  6                   6   \n",
       "\n",
       "                  X_5                 X_6                 X_7  ...  \\\n",
       "0                1000                1000                1000  ...   \n",
       "1               3.992               3.351               2.658  ...   \n",
       "2  1.5317579058984108  1.7728344372619638  1.9335911601925522  ...   \n",
       "3                   2                   1                   0  ...   \n",
       "4                   6                   6                   6  ...   \n",
       "\n",
       "                     X_91                X_92                   X_93  \\\n",
       "0                    1000                1000                   1000   \n",
       "1    0.029485678112799993  -0.015578821739792  -0.034382649559500045   \n",
       "2       1.310014162316104  0.9928149233047021     1.3489092513130678   \n",
       "3  -0.0021861139000000003      -0.00099003483  -0.007215624499999999   \n",
       "4       4.247343099999999            3.266212              3.7208654   \n",
       "\n",
       "                  X_94                  X_95                  X_96  \\\n",
       "0                 1000                  1000                  1000   \n",
       "1  0.03410696849619999  0.004251966935300004  0.031006250621699986   \n",
       "2   1.0175306226012728    0.9937576399485477    1.2158726897277283   \n",
       "3        -0.0087255505         -0.0049381842         -0.0030899136   \n",
       "4            3.1033252    3.1775257999999997    3.5107522000000007   \n",
       "\n",
       "                    X_97                    X_98                  X_99  \\\n",
       "0                   1000                    1000                  1000   \n",
       "1  -0.008798382997400003    0.009004523887710001  -0.05045604444349996   \n",
       "2     1.4720581081411457      0.8531823148864874    1.0285753546436174   \n",
       "3          -0.0012198847  -0.0017179298999999999           -0.00151381   \n",
       "4              5.6920133      3.0476226000000004             3.1554324   \n",
       "\n",
       "                    y  \n",
       "0                1000  \n",
       "1               0.483  \n",
       "2  0.4999609594367951  \n",
       "3                   0  \n",
       "4                   1  \n",
       "\n",
       "[5 rows x 103 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "submitter.get_statistics(path='hdfs://localhost:9000/dataset/ten_million_top1k.csv', from_sampling=False, file_type=FILE_TYPE_CSV,with_header=True).to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "purple-graphics",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2021-04-20 20:24:25,986 INFO submit.py[180] request: http://localhost:8000/v1/query/evaluation/job/ with data {'job_id': 50072}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'code': 0,\n",
       " 'msg': '',\n",
       " 'data': {'job_id': 50072,\n",
       "  'job_status': 'Succeed',\n",
       "  'msg': 'succeed',\n",
       "  'method': 'Compare Evaluation',\n",
       "  'start_time': '2021/04/20/ 20:24:02',\n",
       "  'end_time': '2021/04/20 20:24:08',\n",
       "  'result': \"{'# id': {'count': '904', 'mean': '502.4845132743363', 'stddev': '297.45554733302066', 'min': '1', 'max': '998', 'mean_bias': 0.005975001550222782, 'stddev_bias': 0.02990141991139487, 'score': 99.10308946345955}, 'X_0': {'count': '904', 'mean': '2.043141592920354', 'stddev': '1.8639547253150965', 'min': '0', 'max': '6', 'mean_bias': 0, 'stddev_bias': 0.013237212908906726, 'score': 99.66906967727734}, 'X_1': {'count': '904', 'mean': '3.004424778761062', 'stddev': '2.3479173167596703', 'min': '0', 'max': '6', 'mean_bias': 0.015265559239245522, 'stddev_bias': 0.019325821259871253, 'score': 99.13521548752209}, 'X_2': {'count': '904', 'mean': '2.799778761061947', 'stddev': '2.0559073936446555', 'min': '0', 'max': '6', 'mean_bias': 0.005407189676040081, 'stddev_bias': 2.9192600501779837e-05, 'score': 99.86409044308645}, 'X_3': {'count': '904', 'mean': '3.0519911504424777', 'stddev': '1.880881374867243', 'min': '0', 'max': '6', 'mean_bias': 0.021170253225632513, 'stddev_bias': 0.009484946246122967, 'score': 99.23362001320612}, 'X_4': {'count': '904', 'mean': '4.175884955752212', 'stddev': '1.5739554858948572', 'min': '1', 'max': '6', 'mean_bias': 0.008338884884300138, 'stddev_bias': 0.02892306543576113, 'score': 99.06845124199847}, 'X_5': {'count': '904', 'mean': '4.028761061946903', 'stddev': '1.5398912002201217', 'min': '2', 'max': '6', 'mean_bias': 0.009208682852430608, 'stddev_bias': 0.005309777929261339, 'score': 99.63703848045769}, 'X_6': {'count': '904', 'mean': '3.2776548672566372', 'stddev': '1.7613132585074758', 'min': '1', 'max': '6', 'mean_bias': 0.021887535882829823, 'stddev_bias': 0.006498733616818591, 'score': 99.2903432625088}, 'X_7': {'count': '904', 'mean': '2.768805309734513', 'stddev': '1.9188060119078996', 'min': '0', 'max': '6', 'mean_bias': 0.04168747544564078, 'stddev_bias': 0.0076464707685053035, 'score': 98.76665134464635}, 'X_8': {'count': '904', 'mean': '2.7123893805309733', 'stddev': '1.9657142162014198', 'min': '0', 'max': '6', 'mean_bias': 0.026770943476507564, 'stddev_bias': 0.019792323330999194, 'score': 98.83591832981233}, 'X_9': {'count': '904', 'mean': '2.566371681415929', 'stddev': '1.982765638748184', 'min': '0', 'max': '6', 'mean_bias': 0.014071578403407992, 'stddev_bias': 0.009285241575772261, 'score': 99.41607950052051}, 'X_10': {'count': '904', 'mean': '2.4269911504424777', 'stddev': '1.8072027098455483', 'min': '0', 'max': '5', 'mean_bias': 0.03881538596337513, 'stddev_bias': 0.01886266452807886, 'score': 98.55804873771365}, 'X_11': {'count': '904', 'mean': '3.140486725663717', 'stddev': '1.7769155233809604', 'min': '1', 'max': '6', 'mean_bias': 0.005921436791709448, 'stddev_bias': 0.010818196153315108, 'score': 99.5815091763744}, 'X_12': {'count': '904', 'mean': '3.1128318584070795', 'stddev': '2.112686760816003', 'min': '0', 'max': '6', 'mean_bias': 0.009676243401582666, 'stddev_bias': 0.01783205331073929, 'score': 99.31229258219196}, 'X_13': {'count': '904', 'mean': '2.1626106194690267', 'stddev': '1.884569339002512', 'min': '0', 'max': '6', 'mean_bias': 0.023211102317512784, 'stddev_bias': 0.01891828366898125, 'score': 98.94676535033764}, 'X_14': {'count': '904', 'mean': '2.35287610619469', 'stddev': '1.6099195973003593', 'min': '0', 'max': '5', 'mean_bias': 0.01758826463687261, 'stddev_bias': 0.01830959060385617, 'score': 99.10255361898179}, 'X_15': {'count': '904', 'mean': '-0.034070674752654816', 'stddev': '0.9821741795570659', 'min': '-0.018425233', 'max': '2.5914002', 'mean_bias': 1, 'stddev_bias': 0.033560490402175305, 'score': 74.16098773994561}, 'X_16': {'count': '904', 'mean': '-0.03509833923761053', 'stddev': '1.4849229085451956', 'min': '-0.0012198847', 'max': '5.6920133', 'mean_bias': 1, 'stddev_bias': 0.008739329196926236, 'score': 74.78151677007683}, 'X_17': {'count': '904', 'mean': '0.06499539622367251', 'stddev': '1.2105697696260267', 'min': '-0.0030899136', 'max': '3.0497526', 'mean_bias': 1, 'stddev_bias': 0.00436141065302579, 'score': 74.89096473367437}, 'X_18': {'count': '904', 'mean': '0.01766909315497786', 'stddev': '0.8029150351036136', 'min': '-0.0032088744', 'max': '3.0907927', 'mean_bias': 0, 'stddev_bias': 0.013932070079433652, 'score': 99.65169824801416}, 'X_19': {'count': '904', 'mean': '0.025927504823340723', 'stddev': '0.9805679847277169', 'min': '-0.010757286999999999', 'max': '3.2561489', 'mean_bias': 0, 'stddev_bias': 0.025016416971533128, 'score': 99.37458957571167}, 'X_20': {'count': '904', 'mean': '0.0053688892176992084', 'stddev': '1.394466931688634', 'min': '-0.020871571999999998', 'max': '5.016143700000001', 'mean_bias': 0, 'stddev_bias': 0.0032892510398994735, 'score': 99.91776872400251}, 'X_21': {'count': '904', 'mean': '-0.07611311426935831', 'stddev': '1.5848725166122475', 'min': '-0.0010833961', 'max': '5.2880231', 'mean_bias': 0.7188520150815199, 'stddev_bias': 0.0227467463615556, 'score': 81.46003096392312}, 'X_22': {'count': '904', 'mean': '0.08364952558871681', 'stddev': '1.0334558059881225', 'min': '-0.0034135259000000005', 'max': '3.2570428', 'mean_bias': 1, 'stddev_bias': 0.029749449577952492, 'score': 74.25626376055119}, 'X_23': {'count': '904', 'mean': '-0.01494350367909287', 'stddev': '0.9703355983624548', 'min': '-0.0017714715', 'max': '3.0996507999999996', 'mean_bias': 0, 'stddev_bias': 0.025719874913370633, 'score': 99.35700312716574}, 'X_24': {'count': '904', 'mean': '0.028701548790376088', 'stddev': '0.963624797645245', 'min': '-0.006681495500000001', 'max': '2.7941697', 'mean_bias': 0.3809158710533335, 'stddev_bias': 0.026432355220945253, 'score': 89.81629434314303}, 'X_25': {'count': '904', 'mean': '0.0296358352472345', 'stddev': '0.8237281702531541', 'min': '-0.0016616460000000001', 'max': '3.0585120999999997', 'mean_bias': 0.5285781423631845, 'stddev_bias': 0.020728958970796466, 'score': 86.26732246665047}, 'X_26': {'count': '904', 'mean': '-0.08038358620508851', 'stddev': '0.9716518045838415', 'min': '-0.01529318', 'max': '3.0391082999999997', 'mean_bias': 1, 'stddev_bias': 0.010232962044854414, 'score': 74.74417594887865}, 'X_27': {'count': '904', 'mean': '0.04707043131245577', 'stddev': '1.0185557534437053', 'min': '-0.002217752', 'max': '3.7472445', 'mean_bias': 0, 'stddev_bias': 0.019180113889822593, 'score': 99.52049715275444}, 'X_28': {'count': '904', 'mean': '0.04030136749845136', 'stddev': '1.1659211363745208', 'min': '-0.0075207629', 'max': '4.3657891', 'mean_bias': 0.5056827590526853, 'stddev_bias': 0.02017891805915148, 'score': 86.85345807220408}, 'X_29': {'count': '904', 'mean': '0.02170623706232301', 'stddev': '0.3943368250124789', 'min': '-0.00033808202000000005', 'max': '0.9071779000000001', 'mean_bias': 1, 'stddev_bias': 0.008185711073506212, 'score': 74.79535722316234}, 'X_30': {'count': '904', 'mean': '-0.043781253054070844', 'stddev': '1.0632638241683061', 'min': '-0.00098797259', 'max': '3.6163004', 'mean_bias': 1, 'stddev_bias': 0.032855666379594535, 'score': 74.17860834051014}, 'X_31': {'count': '904', 'mean': '-0.05944298595183628', 'stddev': '1.088700854641638', 'min': '-0.00074937357', 'max': '3.6517383000000003', 'mean_bias': 0.870658931715651, 'stddev_bias': 0.018064887048303616, 'score': 77.78190453090113}, 'X_32': {'count': '904', 'mean': '0.01766909315497786', 'stddev': '0.8029150351036136', 'min': '-0.0032088744', 'max': '3.0907927', 'mean_bias': 0, 'stddev_bias': 0.013932070079433652, 'score': 99.65169824801416}, 'X_33': {'count': '904', 'mean': '0.006551921346603997', 'stddev': '1.0246485009554973', 'min': '-0.0026603396', 'max': '2.6569436', 'mean_bias': 0.7711858038964924, 'stddev_bias': 0.026016040078295825, 'score': 80.0699539006303}, 'X_34': {'count': '904', 'mean': '0.04245015497454636', 'stddev': '1.0006100894077643', 'min': '-0.016457403', 'max': '2.613791', 'mean_bias': 0.7325407227681574, 'stddev_bias': 0.002954027157587598, 'score': 81.61263125185637}, 'X_35': {'count': '904', 'mean': '-0.009155284208296475', 'stddev': '1.0184858363737443', 'min': '-0.0016161931', 'max': '2.7035566', 'mean_bias': 1, 'stddev_bias': 0.0307911139816813, 'score': 74.23022215045798}, 'X_36': {'count': '904', 'mean': '-0.04166934463816366', 'stddev': '1.2942237320463832', 'min': '-0.0045589377000000006', 'max': '4.8623764000000005', 'mean_bias': 1, 'stddev_bias': 0.009618005420498711, 'score': 74.75954986448752}, 'X_37': {'count': '904', 'mean': '-0.045639820784557514', 'stddev': '1.0972453217029485', 'min': '-0.00073222587', 'max': '3.4580937999999994', 'mean_bias': 0.6162271168291866, 'stddev_bias': 0.022407770131937572, 'score': 84.0341278259719}, 'X_38': {'count': '904', 'mean': '0.012349802338384936', 'stddev': '0.9552504925572244', 'min': '-0.0028899015000000004', 'max': '2.8222251', 'mean_bias': 0, 'stddev_bias': 0.03397459078297378, 'score': 99.15063523042565}, 'X_39': {'count': '904', 'mean': '-0.07209106821515489', 'stddev': '1.3430658770066557', 'min': '-0.007215624499999999', 'max': '3.7208654', 'mean_bias': 1, 'stddev_bias': 0.0043319254432601885, 'score': 74.89170186391848}, 'X_40': {'count': '904', 'mean': '0.04192463421548679', 'stddev': '0.9589937192938867', 'min': '-0.010397587', 'max': '2.7344937000000002', 'mean_bias': 1, 'stddev_bias': 0.0716021109379158, 'score': 73.2099472265521}, 'X_41': {'count': '904', 'mean': '-0.07611311426935831', 'stddev': '1.5848725166122475', 'min': '-0.0010833961', 'max': '5.2880231', 'mean_bias': 0.7188520150815199, 'stddev_bias': 0.0227467463615556, 'score': 81.46003096392312}, 'X_42': {'count': '904', 'mean': '-0.055106964803761045', 'stddev': '1.0534229992909745', 'min': '-0.0091515934', 'max': '2.7959132999999996', 'mean_bias': 1, 'stddev_bias': 0.0017590619444204994, 'score': 74.95602345138948}, 'X_43': {'count': '904', 'mean': '-0.03873307733882739', 'stddev': '0.9898631429304386', 'min': '-0.002323861', 'max': '3.1930612999999997', 'mean_bias': 0, 'stddev_bias': 0.005415080946055075, 'score': 99.86462297634863}, 'X_44': {'count': '904', 'mean': '0.025813362764823076', 'stddev': '0.576439345792362', 'min': '-0.004300643900000001', 'max': '1.7105518', 'mean_bias': 1, 'stddev_bias': 0.00596964928804255, 'score': 74.85075876779894}, 'X_45': {'count': '904', 'mean': '0.06794211359524324', 'stddev': '0.9723472893621423', 'min': '-0.0056203830000000005', 'max': '3.1052197', 'mean_bias': 1, 'stddev_bias': 0.008499810484815741, 'score': 74.7875047378796}, 'X_46': {'count': '904', 'mean': '0.12235587408749994', 'stddev': '1.0059366354079522', 'min': '-0.0060434911', 'max': '3.6842106', 'mean_bias': 0.5757617797311378, 'stddev_bias': 0.03063943721686918, 'score': 84.83996957629982}, 'X_47': {'count': '904', 'mean': '-0.03948868115586285', 'stddev': '1.295435741956866', 'min': '-0.0046407803', 'max': '3.515325', 'mean_bias': 0.4357264608934512, 'stddev_bias': 0.018375686935660617, 'score': 88.64744630427221}, 'X_48': {'count': '904', 'mean': '0.01766909315497786', 'stddev': '0.8029150351036136', 'min': '-0.0032088744', 'max': '3.0907927', 'mean_bias': 0, 'stddev_bias': 0.013932070079433652, 'score': 99.65169824801416}, 'X_49': {'count': '904', 'mean': '-0.06690525996150437', 'stddev': '1.0006720635517958', 'min': '-0.011692343000000001', 'max': '3.0537411', 'mean_bias': 1, 'stddev_bias': 0.02710418094339113, 'score': 74.32239547641522}, 'X_50': {'count': '904', 'mean': '2.584070796460177', 'stddev': '1.6396291816238784', 'min': '0', 'max': '5', 'mean_bias': 0.00689054709447458, 'stddev_bias': 0.0029471460186951946, 'score': 99.75405767217076}, 'X_51': {'count': '904', 'mean': '2.077433628318584', 'stddev': '1.8027233139453136', 'min': '0', 'max': '5', 'mean_bias': 0.007973618786309544, 'stddev_bias': 0.011162311162719553, 'score': 99.52160175127428}, 'X_52': {'count': '904', 'mean': '2.2986725663716814', 'stddev': '2.136369242548914', 'min': '0', 'max': '6', 'mean_bias': 0, 'stddev_bias': 0.0283961187425975, 'score': 99.29009703143507}, 'X_53': {'count': '904', 'mean': '2.643805309734513', 'stddev': '1.9504678456672258', 'min': '0', 'max': '6', 'mean_bias': 0.010242762603940817, 'stddev_bias': 0.009366806896539718, 'score': 99.50976076248799}, 'X_54': {'count': '904', 'mean': '2.1825221238938055', 'stddev': '1.4751105405798257', 'min': '0', 'max': '5', 'mean_bias': 0.03143767669839575, 'stddev_bias': 0.02413080783247031, 'score': 98.61078788672835}, 'X_55': {'count': '904', 'mean': '2.0807522123893807', 'stddev': '1.196666229480531', 'min': '0', 'max': '4', 'mean_bias': 0.03109624003438102, 'stddev_bias': 0.022657366011612865, 'score': 98.65615984885015}, 'X_56': {'count': '904', 'mean': '2.6106194690265485', 'stddev': '1.534735523259186', 'min': '1', 'max': '6', 'mean_bias': 0.035247794151312434, 'stddev_bias': 0.01788989032064233, 'score': 98.67155788820114}, 'X_57': {'count': '904', 'mean': '3.1106194690265485', 'stddev': '2.0970205233645984', 'min': '0', 'max': '6', 'mean_bias': 0.03126145467874536, 'stddev_bias': 0.001746051162938211, 'score': 99.1748123539579}, 'X_58': {'count': '904', 'mean': '2.8827433628318584', 'stddev': '2.2598549518963336', 'min': '0', 'max': '6', 'mean_bias': 0.024122084349404802, 'stddev_bias': 0.011561342265668522, 'score': 99.10791433462316}, 'X_59': {'count': '904', 'mean': '2.5309734513274336', 'stddev': '1.660275478937473', 'min': '0', 'max': '5', 'mean_bias': 0, 'stddev_bias': 0.00410817078402563, 'score': 99.89729573039936}, 'X_60': {'count': '904', 'mean': '2.980088495575221', 'stddev': '1.2369547829314547', 'min': '1', 'max': '5', 'mean_bias': 0.017769118136050976, 'stddev_bias': 0.013239170724015141, 'score': 99.22479277849835}, 'X_61': {'count': '904', 'mean': '3.663716814159292', 'stddev': '1.370375505887595', 'min': '2', 'max': '6', 'mean_bias': 0.01375672776958829, 'stddev_bias': 0.011674084868441018, 'score': 99.36422968404926}, 'X_62': {'count': '904', 'mean': '3.1515486725663715', 'stddev': '2.0616838347855797', 'min': '0', 'max': '6', 'mean_bias': 0.006562974310562669, 'stddev_bias': 0.009609827006357066, 'score': 99.59567996707702}, 'X_63': {'count': '904', 'mean': '2.1526548672566372', 'stddev': '1.9497944430711125', 'min': '0', 'max': '6', 'mean_bias': 0.03684345984043071, 'stddev_bias': 0.016922560426133347, 'score': 98.65584949333591}, 'X_64': {'count': '904', 'mean': '2.7223451327433628', 'stddev': '1.6636657241010946', 'min': '0', 'max': '5', 'mean_bias': 0.010146616973418524, 'stddev_bias': 0.011224253310953242, 'score': 99.4657282428907}, 'X_65': {'count': '904', 'mean': '-0.055106964803761045', 'stddev': '1.0534229992909745', 'min': '-0.0091515934', 'max': '2.7959132999999996', 'mean_bias': 1, 'stddev_bias': 0.0017590619444204994, 'score': 74.95602345138948}, 'X_66': {'count': '904', 'mean': '0.0154722464362832', 'stddev': '0.9011814486726939', 'min': '-0.0010881716', 'max': '3.4451442', 'mean_bias': 0, 'stddev_bias': 0.0111470073099936, 'score': 99.72132481725015}, 'X_67': {'count': '904', 'mean': '0.02359740011991152', 'stddev': '1.0233312829221162', 'min': '-0.0087792385', 'max': '3.1476658', 'mean_bias': 0, 'stddev_bias': 0.0046492459151833134, 'score': 99.88376885212041}, 'X_68': {'count': '904', 'mean': '0.0154722464362832', 'stddev': '0.9011814486726939', 'min': '-0.0010881716', 'max': '3.4451442', 'mean_bias': 0, 'stddev_bias': 0.0111470073099936, 'score': 99.72132481725015}, 'X_69': {'count': '904', 'mean': '0.06548671605884951', 'stddev': '1.312024328523548', 'min': '-0.0021861139000000003', 'max': '3.6726089', 'mean_bias': 1, 'stddev_bias': 0.001534461431997136, 'score': 74.96163846420008}, 'X_70': {'count': '904', 'mean': '0.025813362764823076', 'stddev': '0.576439345792362', 'min': '-0.004300643900000001', 'max': '1.7105518', 'mean_bias': 1, 'stddev_bias': 0.00596964928804255, 'score': 74.85075876779894}, 'X_71': {'count': '904', 'mean': '-0.02717241083860621', 'stddev': '0.518625339531382', 'min': '-0.0067626397', 'max': '1.3715912000000001', 'mean_bias': 1, 'stddev_bias': 0.0018928843247073286, 'score': 74.95267789188232}, 'X_72': {'count': '904', 'mean': '0.0053688892176992084', 'stddev': '1.394466931688634', 'min': '-0.020871571999999998', 'max': '5.016143700000001', 'mean_bias': 0, 'stddev_bias': 0.0032892510398994735, 'score': 99.91776872400251}, 'X_73': {'count': '904', 'mean': '-0.012947893357673703', 'stddev': '1.0149361576792528', 'min': '-0.0066261777', 'max': '3.6000202000000003', 'mean_bias': 1, 'stddev_bias': 0.027163004304717218, 'score': 74.32092489238207}, 'X_74': {'count': '904', 'mean': '0.02708510335257746', 'stddev': '0.8613958989168103', 'min': '-0.0020981927', 'max': '2.6094707', 'mean_bias': 1, 'stddev_bias': 0.009626997520940987, 'score': 74.75932506197648}, 'X_75': {'count': '904', 'mean': '0.035850764773230036', 'stddev': '1.3045602798775962', 'min': '-0.0033128438', 'max': '3.9027361000000003', 'mean_bias': 1, 'stddev_bias': 0.009385353375707313, 'score': 74.76536616560732}, 'X_76': {'count': '904', 'mean': '-0.03254219391780966', 'stddev': '0.9366452997925659', 'min': '-0.0039001177', 'max': '2.4984895000000003', 'mean_bias': 0.6835761265752802, 'stddev_bias': 0.04353382360161276, 'score': 81.82225124557768}, 'X_77': {'count': '904', 'mean': '0.02708510335257746', 'stddev': '0.8613958989168103', 'min': '-0.0020981927', 'max': '2.6094707', 'mean_bias': 1, 'stddev_bias': 0.009626997520940987, 'score': 74.75932506197648}, 'X_78': {'count': '904', 'mean': '-0.045639820784557514', 'stddev': '1.0972453217029485', 'min': '-0.00073222587', 'max': '3.4580937999999994', 'mean_bias': 0.6162271168291866, 'stddev_bias': 0.022407770131937572, 'score': 84.0341278259719}, 'X_79': {'count': '904', 'mean': '0.04030136749845136', 'stddev': '1.1659211363745208', 'min': '-0.0075207629', 'max': '4.3657891', 'mean_bias': 0.5056827590526853, 'stddev_bias': 0.02017891805915148, 'score': 86.85345807220408}, 'X_80': {'count': '904', 'mean': '0.06548671605884951', 'stddev': '1.312024328523548', 'min': '-0.0021861139000000003', 'max': '3.6726089', 'mean_bias': 1, 'stddev_bias': 0.001534461431997136, 'score': 74.96163846420008}, 'X_81': {'count': '904', 'mean': '0.0149358733349558', 'stddev': '0.9601538584065961', 'min': '-0.0012475341', 'max': '3.2756502', 'mean_bias': 1, 'stddev_bias': 0.04257904643913242, 'score': 73.93552383902168}, 'X_82': {'count': '904', 'mean': '0.06499539622367251', 'stddev': '1.2105697696260267', 'min': '-0.0030899136', 'max': '3.0497526', 'mean_bias': 1, 'stddev_bias': 0.00436141065302579, 'score': 74.89096473367437}, 'X_83': {'count': '904', 'mean': '0.04622053365309734', 'stddev': '0.9650162678717619', 'min': '-0.0054295689', 'max': '2.8977452', 'mean_bias': 0.35472471890248414, 'stddev_bias': 0.008813451236634354, 'score': 90.91154574652204}, 'X_84': {'count': '904', 'mean': '0.03815544244723449', 'stddev': '0.9056553195460745', 'min': '-0.0073042275', 'max': '2.7272644', 'mean_bias': 1, 'stddev_bias': 0.0074107434682380855, 'score': 74.81473141329404}, 'X_85': {'count': '904', 'mean': '-0.055106964803761045', 'stddev': '1.0534229992909745', 'min': '-0.0091515934', 'max': '2.7959132999999996', 'mean_bias': 1, 'stddev_bias': 0.0017590619444204994, 'score': 74.95602345138948}, 'X_86': {'count': '904', 'mean': '-0.07209106821515489', 'stddev': '1.3430658770066557', 'min': '-0.007215624499999999', 'max': '3.7208654', 'mean_bias': 1, 'stddev_bias': 0.0043319254432601885, 'score': 74.89170186391848}, 'X_87': {'count': '904', 'mean': '0.01876433489015487', 'stddev': '1.028202925370301', 'min': '-0.0035480724', 'max': '3.6513329', 'mean_bias': 0.7232579559385736, 'stddev_bias': 0.01283964275899737, 'score': 81.59756003256074}, 'X_88': {'count': '904', 'mean': '-0.024487125154668114', 'stddev': '0.7665771398697134', 'min': '-0.0007652094199999999', 'max': '2.8832355', 'mean_bias': 1, 'stddev_bias': 0.009623113170479503, 'score': 74.759422170738}, 'X_89': {'count': '904', 'mean': '0.01766909315497786', 'stddev': '0.8029150351036136', 'min': '-0.0032088744', 'max': '3.0907927', 'mean_bias': 0, 'stddev_bias': 0.013932070079433652, 'score': 99.65169824801416}, 'X_90': {'count': '904', 'mean': '-0.013203371167123867', 'stddev': '0.9452522282015366', 'min': '-0.00028950049', 'max': '2.9261272000000003', 'mean_bias': 1, 'stddev_bias': 0.04364958664171923, 'score': 73.90876033395702}, 'X_91': {'count': '904', 'mean': '0.06548671605884951', 'stddev': '1.312024328523548', 'min': '-0.0021861139000000003', 'max': '3.6726089', 'mean_bias': 1, 'stddev_bias': 0.001534461431997136, 'score': 74.96163846420008}, 'X_92': {'count': '904', 'mean': '-0.021959127318234503', 'stddev': '0.9654200470977276', 'min': '-0.0062116785', 'max': '3.266212', 'mean_bias': 0, 'stddev_bias': 0.02759313499820034, 'score': 99.310171625045}, 'X_93': {'count': '904', 'mean': '-0.07209106821515489', 'stddev': '1.3430658770066557', 'min': '-0.007215624499999999', 'max': '3.7208654', 'mean_bias': 1, 'stddev_bias': 0.0043319254432601885, 'score': 74.89170186391848}, 'X_94': {'count': '904', 'mean': '-0.02402704079137166', 'stddev': '1.022112535896386', 'min': '-0.0087255505', 'max': '2.9007362999999997', 'mean_bias': 1, 'stddev_bias': 0.004502973368408115, 'score': 74.88742566578979}, 'X_95': {'count': '904', 'mean': '-0.002035233520022116', 'stddev': '1.0056110787165975', 'min': '-0.0049381842', 'max': '3.1775257999999997', 'mean_bias': 0, 'stddev_bias': 0.011927896995753917, 'score': 99.70180257510614}, 'X_96': {'count': '904', 'mean': '0.06499539622367251', 'stddev': '1.2105697696260267', 'min': '-0.0030899136', 'max': '3.0497526', 'mean_bias': 1, 'stddev_bias': 0.00436141065302579, 'score': 74.89096473367437}, 'X_97': {'count': '904', 'mean': '-0.03509833923761053', 'stddev': '1.4849229085451956', 'min': '-0.0012198847', 'max': '5.6920133', 'mean_bias': 1, 'stddev_bias': 0.008739329196926236, 'score': 74.78151677007683}, 'X_98': {'count': '904', 'mean': '0.02708510335257746', 'stddev': '0.8613958989168103', 'min': '-0.0020981927', 'max': '2.6094707', 'mean_bias': 1, 'stddev_bias': 0.009626997520940987, 'score': 74.75932506197648}, 'X_99': {'count': '904', 'mean': '-0.07046490533683618', 'stddev': '1.0261487818249295', 'min': '-0.00151381', 'max': '3.1554324', 'mean_bias': 0.39656023602369167, 'stddev_bias': 0.002359159013224285, 'score': 90.02701512407711}, 'y': {'count': '904', 'mean': '0.4856194690265487', 'stddev': '0.5000698213678834', 'min': '0', 'max': '1', 'mean_bias': 0, 'stddev_bias': 0.00021774086362845294, 'score': 99.9945564784093}}\",\n",
       "  'request_data': \"{'method': 'compare', 'type': 'csv', 'compare_job_id': 10188, 'path': 'hdfs://localhost:9000/dataset/ten_million_top1k.csv-sampled-1618921437.4498558', 'source_path': 'hdfs://localhost:9000/dataset/ten_million_top1k.csv', 'file_type': 'csv', 'with_header': True}\"}}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluation_job_data = submitter.get_evaluation_job_details(job_id=50072)\n",
    "evaluation_job_data.to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "upset-feeding",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th># id</th>\n",
       "      <th>X_0</th>\n",
       "      <th>X_1</th>\n",
       "      <th>X_10</th>\n",
       "      <th>X_11</th>\n",
       "      <th>X_12</th>\n",
       "      <th>X_13</th>\n",
       "      <th>X_14</th>\n",
       "      <th>X_15</th>\n",
       "      <th>X_16</th>\n",
       "      <th>...</th>\n",
       "      <th>X_91</th>\n",
       "      <th>X_92</th>\n",
       "      <th>X_93</th>\n",
       "      <th>X_94</th>\n",
       "      <th>X_95</th>\n",
       "      <th>X_96</th>\n",
       "      <th>X_97</th>\n",
       "      <th>X_98</th>\n",
       "      <th>X_99</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>...</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "      <td>904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>502.4845132743363</td>\n",
       "      <td>2.043141592920354</td>\n",
       "      <td>3.004424778761062</td>\n",
       "      <td>2.4269911504424777</td>\n",
       "      <td>3.140486725663717</td>\n",
       "      <td>3.1128318584070795</td>\n",
       "      <td>2.1626106194690267</td>\n",
       "      <td>2.35287610619469</td>\n",
       "      <td>-0.034070674752654816</td>\n",
       "      <td>-0.03509833923761053</td>\n",
       "      <td>...</td>\n",
       "      <td>0.06548671605884951</td>\n",
       "      <td>-0.021959127318234503</td>\n",
       "      <td>-0.07209106821515489</td>\n",
       "      <td>-0.02402704079137166</td>\n",
       "      <td>-0.002035233520022116</td>\n",
       "      <td>0.06499539622367251</td>\n",
       "      <td>-0.03509833923761053</td>\n",
       "      <td>0.02708510335257746</td>\n",
       "      <td>-0.07046490533683618</td>\n",
       "      <td>0.4856194690265487</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>stddev</th>\n",
       "      <td>297.45554733302066</td>\n",
       "      <td>1.8639547253150965</td>\n",
       "      <td>2.3479173167596703</td>\n",
       "      <td>1.8072027098455483</td>\n",
       "      <td>1.7769155233809604</td>\n",
       "      <td>2.112686760816003</td>\n",
       "      <td>1.884569339002512</td>\n",
       "      <td>1.6099195973003593</td>\n",
       "      <td>0.9821741795570659</td>\n",
       "      <td>1.4849229085451956</td>\n",
       "      <td>...</td>\n",
       "      <td>1.312024328523548</td>\n",
       "      <td>0.9654200470977276</td>\n",
       "      <td>1.3430658770066557</td>\n",
       "      <td>1.022112535896386</td>\n",
       "      <td>1.0056110787165975</td>\n",
       "      <td>1.2105697696260267</td>\n",
       "      <td>1.4849229085451956</td>\n",
       "      <td>0.8613958989168103</td>\n",
       "      <td>1.0261487818249295</td>\n",
       "      <td>0.5000698213678834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.018425233</td>\n",
       "      <td>-0.0012198847</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.0021861139000000003</td>\n",
       "      <td>-0.0062116785</td>\n",
       "      <td>-0.007215624499999999</td>\n",
       "      <td>-0.0087255505</td>\n",
       "      <td>-0.0049381842</td>\n",
       "      <td>-0.0030899136</td>\n",
       "      <td>-0.0012198847</td>\n",
       "      <td>-0.0020981927</td>\n",
       "      <td>-0.00151381</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>998</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>2.5914002</td>\n",
       "      <td>5.6920133</td>\n",
       "      <td>...</td>\n",
       "      <td>3.6726089</td>\n",
       "      <td>3.266212</td>\n",
       "      <td>3.7208654</td>\n",
       "      <td>2.9007362999999997</td>\n",
       "      <td>3.1775257999999997</td>\n",
       "      <td>3.0497526</td>\n",
       "      <td>5.6920133</td>\n",
       "      <td>2.6094707</td>\n",
       "      <td>3.1554324</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean_bias</th>\n",
       "      <td>0.005975</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0152656</td>\n",
       "      <td>0.0388154</td>\n",
       "      <td>0.00592144</td>\n",
       "      <td>0.00967624</td>\n",
       "      <td>0.0232111</td>\n",
       "      <td>0.0175883</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.39656</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>stddev_bias</th>\n",
       "      <td>0.0299014</td>\n",
       "      <td>0.0132372</td>\n",
       "      <td>0.0193258</td>\n",
       "      <td>0.0188627</td>\n",
       "      <td>0.0108182</td>\n",
       "      <td>0.0178321</td>\n",
       "      <td>0.0189183</td>\n",
       "      <td>0.0183096</td>\n",
       "      <td>0.0335605</td>\n",
       "      <td>0.00873933</td>\n",
       "      <td>...</td>\n",
       "      <td>0.00153446</td>\n",
       "      <td>0.0275931</td>\n",
       "      <td>0.00433193</td>\n",
       "      <td>0.00450297</td>\n",
       "      <td>0.0119279</td>\n",
       "      <td>0.00436141</td>\n",
       "      <td>0.00873933</td>\n",
       "      <td>0.009627</td>\n",
       "      <td>0.00235916</td>\n",
       "      <td>0.000217741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>score</th>\n",
       "      <td>99.1031</td>\n",
       "      <td>99.6691</td>\n",
       "      <td>99.1352</td>\n",
       "      <td>98.558</td>\n",
       "      <td>99.5815</td>\n",
       "      <td>99.3123</td>\n",
       "      <td>98.9468</td>\n",
       "      <td>99.1026</td>\n",
       "      <td>74.161</td>\n",
       "      <td>74.7815</td>\n",
       "      <td>...</td>\n",
       "      <td>74.9616</td>\n",
       "      <td>99.3102</td>\n",
       "      <td>74.8917</td>\n",
       "      <td>74.8874</td>\n",
       "      <td>99.7018</td>\n",
       "      <td>74.891</td>\n",
       "      <td>74.7815</td>\n",
       "      <td>74.7593</td>\n",
       "      <td>90.027</td>\n",
       "      <td>99.9946</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 102 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                           # id                 X_0                 X_1  \\\n",
       "count                       904                 904                 904   \n",
       "mean          502.4845132743363   2.043141592920354   3.004424778761062   \n",
       "stddev       297.45554733302066  1.8639547253150965  2.3479173167596703   \n",
       "min                           1                   0                   0   \n",
       "max                         998                   6                   6   \n",
       "mean_bias              0.005975                   0           0.0152656   \n",
       "stddev_bias           0.0299014           0.0132372           0.0193258   \n",
       "score                   99.1031             99.6691             99.1352   \n",
       "\n",
       "                           X_10                X_11                X_12  \\\n",
       "count                       904                 904                 904   \n",
       "mean         2.4269911504424777   3.140486725663717  3.1128318584070795   \n",
       "stddev       1.8072027098455483  1.7769155233809604   2.112686760816003   \n",
       "min                           0                   1                   0   \n",
       "max                           5                   6                   6   \n",
       "mean_bias             0.0388154          0.00592144          0.00967624   \n",
       "stddev_bias           0.0188627           0.0108182           0.0178321   \n",
       "score                    98.558             99.5815             99.3123   \n",
       "\n",
       "                           X_13                X_14                   X_15  \\\n",
       "count                       904                 904                    904   \n",
       "mean         2.1626106194690267    2.35287610619469  -0.034070674752654816   \n",
       "stddev        1.884569339002512  1.6099195973003593     0.9821741795570659   \n",
       "min                           0                   0           -0.018425233   \n",
       "max                           6                   5              2.5914002   \n",
       "mean_bias             0.0232111           0.0175883                      1   \n",
       "stddev_bias           0.0189183           0.0183096              0.0335605   \n",
       "score                   98.9468             99.1026                 74.161   \n",
       "\n",
       "                             X_16  ...                    X_91  \\\n",
       "count                         904  ...                     904   \n",
       "mean         -0.03509833923761053  ...     0.06548671605884951   \n",
       "stddev         1.4849229085451956  ...       1.312024328523548   \n",
       "min                 -0.0012198847  ...  -0.0021861139000000003   \n",
       "max                     5.6920133  ...               3.6726089   \n",
       "mean_bias                       1  ...                       1   \n",
       "stddev_bias            0.00873933  ...              0.00153446   \n",
       "score                     74.7815  ...                 74.9616   \n",
       "\n",
       "                              X_92                   X_93  \\\n",
       "count                          904                    904   \n",
       "mean         -0.021959127318234503   -0.07209106821515489   \n",
       "stddev          0.9654200470977276     1.3430658770066557   \n",
       "min                  -0.0062116785  -0.007215624499999999   \n",
       "max                       3.266212              3.7208654   \n",
       "mean_bias                        0                      1   \n",
       "stddev_bias              0.0275931             0.00433193   \n",
       "score                      99.3102                74.8917   \n",
       "\n",
       "                             X_94                   X_95                 X_96  \\\n",
       "count                         904                    904                  904   \n",
       "mean         -0.02402704079137166  -0.002035233520022116  0.06499539622367251   \n",
       "stddev          1.022112535896386     1.0056110787165975   1.2105697696260267   \n",
       "min                 -0.0087255505          -0.0049381842        -0.0030899136   \n",
       "max            2.9007362999999997     3.1775257999999997            3.0497526   \n",
       "mean_bias                       1                      0                    1   \n",
       "stddev_bias            0.00450297              0.0119279           0.00436141   \n",
       "score                     74.8874                99.7018               74.891   \n",
       "\n",
       "                             X_97                 X_98                  X_99  \\\n",
       "count                         904                  904                   904   \n",
       "mean         -0.03509833923761053  0.02708510335257746  -0.07046490533683618   \n",
       "stddev         1.4849229085451956   0.8613958989168103    1.0261487818249295   \n",
       "min                 -0.0012198847        -0.0020981927           -0.00151381   \n",
       "max                     5.6920133            2.6094707             3.1554324   \n",
       "mean_bias                       1                    1               0.39656   \n",
       "stddev_bias            0.00873933             0.009627            0.00235916   \n",
       "score                     74.7815              74.7593                90.027   \n",
       "\n",
       "                              y  \n",
       "count                       904  \n",
       "mean         0.4856194690265487  \n",
       "stddev       0.5000698213678834  \n",
       "min                           0  \n",
       "max                           1  \n",
       "mean_bias                     0  \n",
       "stddev_bias         0.000217741  \n",
       "score                   99.9946  \n",
       "\n",
       "[8 rows x 102 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluation_job_data.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "destroyed-election",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DataFrame[# id: string, X_0: string, X_1: string, X_2: string, X_3: string, X_4: string, X_5: string, X_6: string, X_7: string, X_8: string, X_9: string, X_10: string, X_11: string, X_12: string, X_13: string, X_14: string, X_15: string, X_16: string, X_17: string, X_18: string, X_19: string, X_20: string, X_21: string, X_22: string, X_23: string, X_24: string, X_25: string, X_26: string, X_27: string, X_28: string, X_29: string, X_30: string, X_31: string, X_32: string, X_33: string, X_34: string, X_35: string, X_36: string, X_37: string, X_38: string, X_39: string, X_40: string, X_41: string, X_42: string, X_43: string, X_44: string, X_45: string, X_46: string, X_47: string, X_48: string, X_49: string, X_50: string, X_51: string, X_52: string, X_53: string, X_54: string, X_55: string, X_56: string, X_57: string, X_58: string, X_59: string, X_60: string, X_61: string, X_62: string, X_63: string, X_64: string, X_65: string, X_66: string, X_67: string, X_68: string, X_69: string, X_70: string, X_71: string, X_72: string, X_73: string, X_74: string, X_75: string, X_76: string, X_77: string, X_78: string, X_79: string, X_80: string, X_81: string, X_82: string, X_83: string, X_84: string, X_85: string, X_86: string, X_87: string, X_88: string, X_89: string, X_90: string, X_91: string, X_92: string, X_93: string, X_94: string, X_95: string, X_96: string, X_97: string, X_98: string, X_99: string, y: string]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from sparksampling.config import SPARK_CONF\n",
    "conf = SPARK_CONF\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "df = spark.read.csv(\"hdfs://localhost:9000/dataset/ten_million_top1k.csv\", header=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "detected-dayton",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+\n",
      "|  y|\n",
      "+---+\n",
      "|  0|\n",
      "|  0|\n",
      "|  0|\n",
      "|  1|\n",
      "|  0|\n",
      "|  1|\n",
      "|  0|\n",
      "|  0|\n",
      "|  0|\n",
      "|  1|\n",
      "|  1|\n",
      "|  1|\n",
      "|  0|\n",
      "|  0|\n",
      "|  1|\n",
      "|  1|\n",
      "|  1|\n",
      "|  1|\n",
      "|  1|\n",
      "|  1|\n",
      "+---+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df[['y']].show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "conceptual-elizabeth",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+--------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+--------------------+------------------+------------------+-----------+-------------------+-------------------+-------------------+-------------------+------------------+-------------------+-------------------+--------------------+-------------------+-------------------+-----------+-------------------+------------+-------------------+------------------+-------------------+--------------------+-------------------+-------------------+-----------+--------------------+------------------+-------------------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--------------------+--------------------+-------------------+--------------------+--------------------+-------------------+--------------------+-------------------+--------------------+------------------+-------------------+--------------------+------------------+-------------------+-------------------+--------------------+--------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+-----------+-------------------+-------------------+--------------------+--------------------+-------------------+------------------+------------------+-------------------+--------------------+------------------+------------------+\n",
      "|X_0|X_1|X_2|X_3|X_4|X_5|X_6|X_7|X_8|X_9|X_10|X_11|X_12|X_13|X_14|                X_15|                X_16|               X_17|               X_18|               X_19|               X_20|               X_21|                X_22|              X_23|              X_24|       X_25|               X_26|               X_27|               X_28|               X_29|              X_30|               X_31|               X_32|                X_33|               X_34|               X_35|       X_36|               X_37|        X_38|               X_39|              X_40|               X_41|                X_42|               X_43|               X_44|       X_45|                X_46|              X_47|               X_48|               X_49|X_50|X_51|X_52|X_53|X_54|X_55|X_56|X_57|X_58|X_59|X_60|X_61|X_62|X_63|X_64|                X_65|                X_66|               X_67|                X_68|                X_69|               X_70|                X_71|               X_72|                X_73|              X_74|               X_75|                X_76|              X_77|               X_78|               X_79|                X_80|                X_81|               X_82|                X_83|                X_84|                X_85|               X_86|               X_87|       X_88|               X_89|               X_90|                X_91|                X_92|               X_93|              X_94|              X_95|               X_96|                X_97|              X_98|              X_99|\n",
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+--------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+--------------------+------------------+------------------+-----------+-------------------+-------------------+-------------------+-------------------+------------------+-------------------+-------------------+--------------------+-------------------+-------------------+-----------+-------------------+------------+-------------------+------------------+-------------------+--------------------+-------------------+-------------------+-----------+--------------------+------------------+-------------------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--------------------+--------------------+-------------------+--------------------+--------------------+-------------------+--------------------+-------------------+--------------------+------------------+-------------------+--------------------+------------------+-------------------+-------------------+--------------------+--------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+-----------+-------------------+-------------------+--------------------+--------------------+-------------------+------------------+------------------+-------------------+--------------------+------------------+------------------+\n",
      "|  6|  5|  4|  5|  2|  2|  3|  3|  6|  5|   1|   1|   6|   3|   5|         -0.99513124|          -3.6292718|          2.4204502|        -0.97570679|        -0.10658287|-2.6091914000000003|        -0.50468426|         -0.40461826|        0.76365848|        -0.9612525|-0.43076305|         0.21113412|         -1.6542911|        -0.70296259|         0.71313918|        0.10048822|-1.2143941999999999|        -0.97570679|          0.57761198|           1.844862| 1.2231096000000001| -3.2379518|         0.19014047|  -1.9326177|         -2.6872125|         1.5403452|        -0.50468426|           -2.228743|         0.40144393|          1.3882938|-0.92881286|         -0.51232563|         1.0622768|        -0.97570679|         -1.5348242|   3|   1|   0|   5|   3|   2|   3|   6|   4|   4|   5|   3|   3|   4|   3|           -2.228743|          -1.2733956|         0.52975239|          -1.2733956|  2.9516687000000004|          1.3882938| -1.0942843999999998|-2.6091914000000003|         0.039236337|2.1533357000000004|          3.2339791|         -0.29600692|2.1533357000000004|         0.19014047|        -0.70296259|  2.9516687000000004|          0.71730253|          2.4204502|  0.8193011999999998|  2.2221227999999997|           -2.228743|         -2.6872125|          -1.119919| -1.9173942|        -0.97570679|         -1.7131415|  2.9516687000000004|          -1.6082825|         -2.6872125|         -0.809006|        0.67904304|          2.4204502|          -3.6292718|2.1533357000000004|        0.21443366|\n",
      "|  6|  2|  0|  4|  5|  2|  6|  3|  6|  0|   5|   1|   4|   2|   1|         -0.16656857|          0.61825443|         0.65639777| 0.8627676999999999|          2.4257922|          1.2309041|         -1.7445764|            2.624782|       -0.61661435|        -1.0476028| 0.94202771|         -0.6085349|        -0.40886915| 1.3301321000000002|         0.25909505|        0.35601546|-1.0189006999999999| 0.8627676999999999|          0.11420586|        -0.91404927|          1.3910996| 0.28547551|         -1.2525113| -0.84352648|        -0.72700356|        0.58363041|         -1.7445764|         -0.48258383|        -0.13750778|        0.081833766|  1.1217928|         -0.33013971|        -1.4597562| 0.8627676999999999|         0.72914825|   3|   5|   0|   2|   0|   2|   2|   0|   4|   3|   2|   2|   1|   1|   3|         -0.48258383|          0.93100776|          1.4746455|          0.93100776|          0.44650815|        0.081833766|         -0.23991306|          1.2309041|        -0.076009164|       -0.20594382|        -0.43333899|          0.41054772|       -0.20594382|         -1.2525113| 1.3301321000000002|          0.44650815|         -0.20745641|         0.65639777|          0.51757123|0.049731637999999995|         -0.48258383|        -0.72700356|          2.9264381| 0.17387676| 0.8627676999999999|          1.2232214|          0.44650815|          0.65125066|        -0.72700356|          2.328492|         0.6974745|         0.65639777|          0.61825443|       -0.20594382|1.0061347999999999|\n",
      "|  4|  2|  0|  4|  6|  3|  3|  3|  6|  0|   2|   1|   2|   1|   4|          0.76313938|         -0.35092328| 1.2323540000000002|         0.55631855|          0.5610991|         0.48232071|-1.7587453999999998|  0.7147380999999999|        0.95239153|        0.37397064| 0.76976473|         -0.1967545|-2.3525424999999998|          1.0626648|         0.42446089|        0.63919844|-1.2624389999999999|         0.55631855|           1.6636232|         0.60920458|         -2.0346345|-0.56166941|         -1.1213488| -0.93978549|-1.3665513999999999|        0.36683096|-1.7587453999999998| -1.0209488000000002|        -0.75183377|         0.43159717|-0.32089286|         -0.82744497|        -1.0918285|         0.55631855|         0.90783157|   3|   5|   1|   1|   0|   4|   6|   3|   4|   3|   3|   2|   3|   0|   3| -1.0209488000000002|          0.54390757|-1.5241122999999999|          0.54390757|           1.1721897|         0.43159717|         -0.50404772|         0.48232071|         -0.48015287|        0.35849345|         0.42253148|  2.5982093999999996|        0.35849345|         -1.1213488|          1.0626648|           1.1721897|          0.64666557| 1.2323540000000002|  1.3712243999999998|          0.61492116| -1.0209488000000002|-1.3665513999999999|        -0.62407587|-0.32808898|         0.55631855|          1.0151651|           1.1721897|          0.74163043|-1.3665513999999999|       -0.65623288|1.0582458000000001| 1.2323540000000002|         -0.35092328|        0.35849345|        0.91475412|\n",
      "|  1|  1|  0|  0|  4|  4|  3|  1|  5|  1|   2|   1|   1|   2|   1|         -0.30378224|           3.2961377|-3.1452332999999997|          0.2689557|         0.72353684| 1.6728718999999996|          2.0803404|          -1.5936442|       -0.28670686|1.0352299999999999|-0.37845171| 1.0288969000000001|          1.0445704|        -0.43403571|        -0.98489694|1.1491063999999998| 2.1890370999999997|          0.2689557|          0.57012508|         -2.2213295|         0.11464969|  3.1765333|         0.90841534|  0.38718835|          3.4903362|        0.78372477|          2.0803404|           2.7881763|        -0.77356064|-1.5429236000000002| 0.84282718|          0.13311064|        0.16832247|          0.2689557|        -0.57010517|   5|   1|   0|   3|   2|   0|   4|   3|   3|   4|   4|   4|   4|   0|   1|           2.7881763|          0.52378836|          1.3186083|          0.52378836|           -3.521899|-1.5429236000000002|  1.3715912000000001| 1.6728718999999996|  1.5789571000000002|        -2.0982302|-3.0413092999999995|-0.05874528099999...|        -2.0982302|         0.90841534|        -0.43403571|           -3.521899|          0.50074265|-3.1452332999999997|          0.08996338|          -2.3976306|           2.7881763|          3.4903362|         0.15819859|  1.8767458|          0.2689557|          -1.717596|           -3.521899|          -0.4430557|          3.4903362|        -2.5527801|        0.40841621|-3.1452332999999997|           3.2961377|        -2.0982302|1.6601564999999998|\n",
      "|  4|  6|  3|  5|  2|  2|  6|  1|  1|  0|   5|   6|   2|   2|   1|          0.10245694|          -1.3200329|         0.86601554|        -0.36423309|          1.1408105|        -0.95956785|        -0.15898948|            1.494116|       -0.56183671|        -1.0759878|-0.16833722|         0.60374344|        -0.67153774|        -0.27192867|         0.25427289|       -0.18361762|        -0.42524256|        -0.36423309|-0.03209193299999...|         0.29698547|         0.14487062| -1.1741302|        0.085536585| -0.95606563|        -0.96148404|       -0.36746842|        -0.15898948| -0.7990598000000001|        -0.67247292|         0.50067466|-0.53631698|          0.69928747|        0.40353625|        -0.36423309|         -1.1566347|   1|   5|   6|   2|   3|   4|   2|   3|   0|   1|   2|   3|   1|   1|   3| -0.7990598000000001|         -0.47274327|         0.53750309|         -0.47274327|  1.0608322000000001|         0.50067466|         -0.39228831|        -0.95956785|          0.51683834|        0.78104806|          1.1746793| -0.8218270000000001|        0.78104806|        0.085536585|        -0.27192867|  1.0608322000000001|          0.44266564|         0.86601554|           2.2438306|           0.8024777| -0.7990598000000001|        -0.96148404|        -0.24356024| -0.6953409|        -0.36423309|-2.3333616000000004|  1.0608322000000001|           -1.199539|        -0.96148404|         2.1445021|        0.72308551|         0.86601554|          -1.3200329|        0.78104806|        0.15986617|\n",
      "|  2|  4|  2|  0|  4|  5|  4|  6|  1|  6|   2|   4|   6|   4|   2|         -0.96405204|           0.7283759|         0.16577893|         0.62047423|         0.18486491| 1.0030947000000001|         -1.0147099|0.055389109000000006|       -0.37081026|1.4775243999999998|  0.6160232|        -0.29975372|         -0.6846797|         0.87899116|        0.088897695|0.9209318999999999|        -0.50357296|         0.62047423|          0.68895459|         0.73692803|         -2.7817731| 0.48760164|        -0.78200821|  0.86012879|        -0.18299075|       -0.33592322|         -1.0147099|-0.07837783599999999|-1.2354258999999999|       -0.084551405| 0.57489029|          0.69135146|       -0.99281242|         0.62047423| 0.6033029000000001|   4|   4|   0|   3|   3|   2|   3|   1|   0|   5|   1|   3|   2|   1|   1|-0.07837783599999999|          0.69090486|        -0.40442941|          0.69090486|-0.01362339699999...|       -0.084551405|        -0.040293516| 1.0030947000000001|         -0.83549044|       -0.33408152|        -0.57736208|         -0.98165852|       -0.33408152|        -0.78200821|         0.87899116|-0.01362339699999...|         -0.29905857|         0.16577893|          0.10806438|         -0.18486996|-0.07837783599999999|        -0.18299075|         0.51200176| 0.29168339|         0.62047423|        -0.15262214|-0.01362339699999...|          0.07893007|        -0.18299075|1.2058451000000001|          1.054715|         0.16577893|           0.7283759|       -0.33408152|        0.21855168|\n",
      "|  0|  5|  2|  4|  6|  2|  3|  5|  2|  1|   5|   2|   2|   0|   4|        -0.066977683|-0.09970291199999999|         0.98502995|         0.57186076|          1.2787298|         0.60422513|-1.5871627000000001| -1.7257403999999998|       -0.34621787|        0.57247962| 0.73473914|          2.8336841|         -1.3966087| 1.0209709999999999|0.34668720000000003|        -1.8843186|-1.0868754999999999|         0.57186076|          0.69173137|        0.033455481| 0.5986300999999999| -0.3176712|         -1.0434301|  -1.9608567|          -1.092099|        -0.8585043|-1.5871627000000001|         -0.80230775|         0.98231416|         0.31173554| 0.48519311|          0.57660583|        -1.0698997|         0.57186076|-1.4999676000000002|   4|   3|   1|   5|   2|   1|   6|   5|   4|   2|   5|   2|   0|   0|   5|         -0.80230775|          0.57874753|         0.37210991|          0.57874753|           0.8970071|         0.31173554|         -0.39647598|         0.60422513|0.003694994299999...|        0.19742867|         0.18989659|           1.4970997|        0.19742867|         -1.0434301| 1.0209709999999999|           0.8970071|          0.58293109|         0.98502995|          0.55483748|          0.42913433|         -0.80230775|          -1.092099| 1.4093403999999998|-0.18396281|         0.57186076|         -2.9724204|           0.8970071|         -0.75964445|          -1.092099|       -0.53496937|       -0.38415834|         0.98502995|-0.09970291199999999|        0.19742867|      -0.078188374|\n",
      "|  0|  2|  2|  4|  2|  4|  2|  0|  4|  0|   1|   2|   6|   0|   1| -1.0104845999999998|         0.033119897|          1.0354242|         0.69814906|        -0.33459009|         0.80198164|         -1.8067369|         0.011908246|         1.0825361|       -0.78835878| 0.86346265|         -0.2123678|         -2.0392121|          1.2040955|         0.37007852|         1.0429217|-1.2018311000000002|         0.69814906|          -1.2195288|        -0.39219495|         0.50769932|-0.23377554|-1.2090530000000002|-0.062359222|         -1.1478214|       -0.57771916|         -1.8067369|         -0.83286848|         -1.0716919|         0.30232317| 0.57337455|         -0.15683231|        -1.2750708|         0.69814906|        -0.88010037|   4|   0|   3|   1|   1|   2|   2|   3|   0|   4|   3|   6|   6|   4|   1|         -0.83286848|  0.7182099000000001|          1.4704351|  0.7182099000000001|          0.91243384|         0.30232317|-0.41186720000000004|         0.80198164|          0.65170806|        0.13954267|         0.08683018|          0.27700269|        0.13954267|-1.2090530000000002|          1.2040955|          0.91243384|          0.84440318|          1.0354242|          0.68719893|          0.40350268|         -0.83286848|         -1.1478214|         0.71358852|  -0.133655|         0.69814906|           0.487022|          0.91243384|         -0.21465652|         -1.1478214|        -1.0055874|1.2550521000000001|          1.0354242|         0.033119897|        0.13954267|         1.6233964|\n",
      "|  0|  6|  0|  5|  5|  3|  1|  1|  1|  5|   2|   4|   4|   0|   1|          0.72805326| -1.5809043999999999|          1.0675763|        -0.41639175|          -1.020573|          -1.126822|        -0.24250456|         -0.98784015|         1.0138609|       -0.66885209|-0.17688446|        -0.74192744|          -0.520946|        -0.29122291|         0.31535432|       -0.64926189|-0.5441649000000001|        -0.41639175|         -0.40594891|           1.087676|         -1.7480286|  -1.413741|        0.067717914|   1.2796215|         -1.1852143|       -0.26683532|        -0.24250456|         -0.98151252|         0.10325883|         0.60867978| 0.03050127|           1.1439427|        0.44689248|        -0.41639175|-0.9209433000000001|   4|   1|   6|   3|   3|   4|   6|   3|   0|   2|   4|   4|   1|   3|   5|         -0.98151252|         -0.54584744|        -0.83643609|         -0.54584744|           1.2974958|         0.60867978|         -0.48194693|          -1.126822|         0.079204526|        0.93998115|          1.4101713|         -0.48974917|        0.93998115|        0.067717914|        -0.29122291|           1.2974958|          0.80179333|          1.0675763|         -0.38775063|          0.97325545|         -0.98151252|         -1.1852143|         0.35571171|-0.83710486|        -0.41639175|        -0.37833571|           1.2974958|          -1.0692248|         -1.1852143|        -1.0601698|        0.34774379|          1.0675763| -1.5809043999999999|        0.93998115|         1.5863257|\n",
      "|  1|  6|  5|  3|  6|  6|  3|  1|  0|  1|   0|   5|   1|   2|   2|-0.09532461699999999|         -0.76238751|        -0.93105874|         -1.1431812|         0.19039043|         -1.6073692|          2.3596292|          -1.0198326|1.6345916999999999|        0.46610251| -1.2605055|         0.28672105|        -0.20957353|-1.7779823000000001|        -0.36281278|        0.75796617| 1.3959458999999999|         -1.1431812|-0.02836606600000...|0.15303060000000002|         0.73044405|-0.32174047|          1.6833744|  -1.3841643| 1.0313328000000002|1.0791620000000002|          2.3596292|          0.69322445|        -0.19589577|        -0.13714216| 0.82318748|-0.37438790000000005|          1.945612|         -1.1431812| 1.0168129000000001|   1|   1|   0|   0|   2|   1|   4|   1|   6|   4|   1|   5|   4|   0|   4|          0.69322445|          -1.2293249|        -0.59348227|          -1.2293249|         -0.65864894|        -0.13714216|          0.34436553|         -1.6073692|          0.31077049|         0.2356448|         0.52098111|         -0.64212255|         0.2356448|          1.6833744|-1.7779823000000001|         -0.65864894|-0.09942323900000001|        -0.93105874|          0.47169009|         -0.11006904|          0.69322445| 1.0313328000000002|-1.2114565000000002|-0.19702684|         -1.1431812|        -0.99732176|         -0.65864894| -0.9731721999999999| 1.0313328000000002|         -1.663666|       -0.86160023|        -0.93105874|         -0.76238751|         0.2356448|        -1.2451878|\n",
      "|  2|  0|  2|  4|  1|  2|  4|  3|  5|  1|   5|   5|   2|   4|   1|          0.23676952|          0.40540048|         -1.9286113|        -0.97178585|         0.69025445|        -0.92876141|          2.8966625|           1.4087912|        0.73166428|       -0.73704497| -1.2996768|         -1.3733457|        -0.11983198|-1.7995077000000002|        -0.67016869|        0.74233094|          2.0375683|        -0.97178585|         -0.17780984|          1.0597076|         0.61591946| 0.77459636|          1.8719035|   1.4208818|          2.1384719|      -0.093439759|          2.8966625|           1.5868349|        0.027375949|        -0.64900252| 0.29249653|        -0.059505725|         1.8655229|        -0.97178585|        -0.18879981|   5|   1|   5|   3|   2|   2|   3|   3|   5|   1|   2|   4|   2|   3|   1|           1.5868349|         -0.96572981|          1.5972913|         -0.96572981|          -1.8026957|        -0.64900252|          0.78372485|        -0.92876141|          0.20550481|       -0.49015846|        -0.54367709|          0.70589164|       -0.49015846|          1.8719035|-1.7995077000000002|          -1.8026957|          0.36340775|         -1.9286113|          0.24783576|         -0.91273078|           1.5868349|          2.1384719|        -0.51636119| 0.45119464|        -0.97178585|          1.1075334|          -1.8026957|          0.24978869|          2.1384719|1.5986653999999998|       -0.48966283|         -1.9286113|          0.40540048|       -0.49015846|       -0.18301955|\n",
      "|  2|  6|  5|  0|  1|  4|  3|  3|  3|  2|   1|   4|   1|   6|   1|          0.24166205|            0.377291|         -1.7363464|        -0.86625072|         -0.7017094|         -0.8212869| 2.5955462000000002|           -1.387398|         0.4791276|       -0.73093924| -1.1619801|         0.61675354| 0.5981188000000001|-1.6084353999999998|        -0.60285449|         1.4417868|          1.8291462|        -0.86625072|         -0.15830473|         0.35163167|         0.44535063| 0.70631115| 1.6752778999999998|  0.10057012| 1.9252991999999998|       -0.24007474| 2.5955462000000002|           1.4295775|         0.65483241|        -0.58656554|  1.2855257|          0.89429057|         1.6661258|        -0.86625072|          0.3898738|   3|   4|   5|   0|   5|   1|   1|   2|   3|   0|   4|   4|   3|   6|   4|           1.4295775|         -0.85965448|         0.45987269|         -0.85965448| -1.6257013999999999|        -0.58656554|          0.70603115|         -0.8212869|          0.41140677|       -0.44735975|        -0.49953958|          0.99453064|       -0.44735975| 1.6752778999999998|-1.6084353999999998| -1.6257013999999999|         -0.17387637|         -1.7363464|  1.6733063999999997|          -0.8259854|           1.4295775| 1.9252991999999998|-1.9163259999999998| 0.41154269|        -0.86625072|         -1.0228314| -1.6257013999999999|          0.86515791| 1.9252991999999998|       -0.50207232|       -0.60615023|         -1.7363464|            0.377291|       -0.44735975|        -1.3741836|\n",
      "|  0|  0|  4|  1|  4|  4|  1|  0|  4|  1|   5|   6|   0|   3|   3|           1.1326278|          -3.1736978|           2.149104|        -0.83205464|         -0.2609744|         -2.2577599|        -0.49697775|           -1.215916|        0.88997284|        0.15846115|-0.35028514|-1.0533573999999999|         0.16487867|        -0.57792801|          0.6351894|        -1.3243546|-1.0992153999999998|        -0.83205464|          -1.5571525|         0.83551665| 1.4418853999999999| -2.8395887|         0.12918319|  0.73562089|          -2.385908|       -0.90771808|        -0.49697775| -1.9751849999999997|        -0.60377175|          1.2237013|-0.32778011|  1.7410352999999998|        0.89005869|        -0.83205464|        -0.74620849|   1|   0|   3|   5|   1|   4|   6|   5|   0|   1|   3|   3|   0|   3|   4| -1.9751849999999997|          -1.0918424|          0.8641532|          -1.0918424|           2.6100105|          1.2237013|           -0.969881|         -2.2577599|           0.8823431|         1.8879229| 2.8315992999999997|         -0.98246635|         1.8879229|         0.12918319|        -0.57792801|           2.6100105|         0.072558716|           2.149104|          0.45721486|           1.9562037| -1.9751849999999997|          -2.385908|         0.17889718| -1.6813517|        -0.83205464|         0.43638202|           2.6100105|  1.9827328000000002|          -2.385908|        0.11757427|         1.3050527|           2.149104|          -3.1736978|         1.8879229|        -2.0808487|\n",
      "|  6|  5|  2|  5|  4|  2|  3|  0|  4|  5|   5|   6|   2|   0|   4|         -0.87680342|          -2.4880098| 1.6101906000000001|        -0.70090277|        -0.98289532|         -1.8248521|        -0.26183992|           1.2589558|        0.52604584|       -0.96629381|-0.33523244|        -0.83834004|         0.11044028|        -0.53754384|         0.47139186|         0.3510643|        -0.77617266|        -0.70090277|          0.35131108|         0.52031726|        -0.25565047| -2.2075124|         0.18643162|  0.21360048|         -1.7877326|        0.73892072|        -0.26183992|          -1.4882582|        -0.25529765|         0.93709835|-0.94279523|         -0.72169182|        0.78701264|        -0.70090277|         -0.0347636|   3|   3|   1|   5|   0|   2|   1|   0|   5|   4|   4|   6|   3|   1|   3|          -1.4882582|         -0.90578592|        -0.45627733|         -0.90578592|             1.97985|         0.93709835|         -0.73057929|         -1.8248521|          0.66898338|         1.4688021|          2.2116166|         -0.57093994|         1.4688021|         0.18643162|        -0.53754384|             1.97985|         -0.19291228| 1.6101906000000001|         -0.33450279|           1.5036677|          -1.4882582|         -1.7877326|-1.0612978000000002| -1.3074284|        -0.70090277|         0.64006235|             1.97985|         -0.61269837|         -1.7877326|         1.2732208|          0.818577| 1.6101906000000001|          -2.4880098|         1.4688021|       -0.29309702|\n",
      "|  0|  1|  6|  0|  5|  2|  6|  2|  1|  6|   1|   1|   1|   2|   2|         -0.55823621|         -0.44092367|         0.16120504|        -0.20513097|         0.16914407|         -0.4147565|         0.16624854|-0.06931578099999999|       -0.53501956|1.7723221000000002|-0.16031873|         0.14974096|          1.1771855|        -0.23587092|        0.039328534|        -1.5407966|       0.0048431897|        -0.20513097|          -1.1303451|        -0.87208553|        -0.32262078|-0.36029979|         0.17477845|  -1.9886002|        -0.17918769|        0.87167166|         0.16624854|         -0.16358168|         -2.1050311|         0.12909191|-0.78314735|  1.6336131999999999|        0.28802918|        -0.20513097|         0.25723979|   4|   4|   3|   3|   2|   2|   1|   1|   0|   0|   3|   3|   4|   3|   3|         -0.16358168|         -0.24347645|         -1.5866185|         -0.24347645| 0.24058579999999996|         0.12909191|        -0.079947243|         -0.4147565|         -0.80688186|        0.24161094|         0.37828277|          0.94004572|        0.24161094|         0.17477845|        -0.23587092| 0.24058579999999996|          0.75667351|         0.16120504|          0.58116709|          0.21672719|         -0.16358168|        -0.17918769|        -0.37231802|-0.21395638|        -0.20513097|        -0.51690598| 0.24058579999999996|            1.127243|        -0.17918769|        0.41399037|        0.19492327|         0.16120504|         -0.44092367|        0.24161094|        0.81618863|\n",
      "|  1|  0|  2|  3|  6|  6|  2|  2|  5|  2|   5|   1|   2|   4|   3|         -0.40280354|           1.0049541|         -1.2160002|       -0.085536888|-1.1600745000000001|         0.32088524| 1.0745641000000001|         -0.57938957|       -0.33238209|       -0.29081851|-0.32431635|         0.89626233|        -0.54016671|        -0.42345884|        -0.39182248|         1.1405324|         0.96224116|       -0.085536888|          0.62439542|        -0.28462522|         0.18527216|  1.0324958|         0.57043431|  0.45067192| 1.3491308000000002|       -0.74565759| 1.0745641000000001|           1.0574762|          0.9049489|        -0.54698636| 0.10074112|         -0.44851799|        0.35890125|       -0.085536888|         0.97145904|   4|   4|   3|   3|   3|   3|   2|   1|   0|   4|   2|   4|   6|   0|   3|           1.0574762|-0.01205730800000...|          1.3951427|-0.01205730800000...| -1.3021236999999999|        -0.54698636|          0.52072433|         0.32088524|          -1.3642696|       -0.67842204|        -0.95553995|         -0.33472164|       -0.67842204|         0.57043431|        -0.42345884| -1.3021236999999999|-0.00708392280000...|         -1.2160002|-0.06482679400000001|         -0.83402046|           1.0574762| 1.3491308000000002|         0.32535964| 0.60893949|       -0.085536888|           1.183537| -1.3021236999999999|         -0.79760648| 1.3491308000000002|        -1.1937777|        0.21948687|         -1.2160002|           1.0049541|       -0.67842204|     -0.0040512211|\n",
      "|  1|  4|  5|  4|  5|  4|  6|  6|  1|  4|   0|   5|   6|   0|   1|          -1.5799589|  3.4188112999999998|        -0.80139391| 1.8828808000000001|-1.2947667999999999|          3.5459828|         -2.0573397|         -0.89161875|       -0.82110785|         0.2900364|   1.607646|         0.27527316|        -0.96813056|          2.3368862|        -0.14521246|        -1.9094395|        -0.55201638| 1.8828808000000001|         -0.84623307|          1.5844399|          2.3831854|  2.6819836|         -1.8672783| -0.38546303|         0.89212038|        0.96407217|         -2.0573397|          0.90647887| 1.1900298999999999|        -0.86734057|   2.009768|          -1.7421141|         -2.770022| 1.8828808000000001|-1.4185031000000001|   5|   1|   0|   1|   2|   2|   3|   2|   0|   1|   3|   5|   4|   3|   1|          0.90647887|  2.1875587000000003| 1.6215468000000002|  2.1875587000000003| -1.4670031000000001|        -0.86734057|          0.44095953|          3.5459828|         -0.21673652|        -1.8058677|-2.8837622000000005| -0.6965395000000001|        -1.8058677|         -1.8672783|          2.3368862| -1.4670031000000001| -1.9763308000000002|        -0.80139391|         0.082522444| -1.5006991000000003|          0.90647887|         0.89212038| 1.2530847999999999|  1.5948518| 1.8828808000000001|          1.5025659| -1.4670031000000001|          0.35872633|         0.89212038|       -0.40563333|        0.14390621|        -0.80139391|  3.4188112999999998|        -1.8058677|        0.86573581|\n",
      "|  2|  1|  6|  4|  6|  6|  2|  2|  3|  6|   0|   2|   2|   4|   2|         -0.03471208|         -0.43559607|         -2.2448248|-1.7695381000000001|          0.6315964|          -2.178789|          4.2820306|         -0.48937072|        -0.9109608|         1.5767178| -2.1123861|         0.75215451|          1.1938935|-2.9557512999999997|        -0.81725437|       -0.84694076|          2.7621447|-1.7695381000000001|-0.00711324949999...|         0.41420197|         0.32821919| 0.24267845| 2.9173061000000002|   -1.256183| 2.4881102999999998|        0.72088014|          4.2820306|  1.7780223999999998|         -1.0295678|         -0.5885516| 0.89305612|         0.032302036|3.1611757999999996|-1.7695381000000001| 1.3555156000000002|   1|   0|   3|   6|   2|   2|   4|   2|   0|   5|   1|   3|   2|   0|   4|  1.7780223999999998| -1.8468494999999998|         0.35627448| -1.8468494999999998|          -1.8978253|         -0.5885516|          0.88003339|          -2.178789|         -0.20886285|       -0.12320881|         0.10922591|          0.62228119|       -0.12320881| 2.9173061000000002|-2.9557512999999997|          -1.8978253|           1.5664686|         -2.2448248|           1.6875467|         -0.74928884|  1.7780223999999998| 2.4881102999999998|         0.15661799| 0.13224973|-1.7695381000000001|         -1.6174573|          -1.8978253|0.039957901000000004| 2.4881102999999998|        0.78385227|        0.39047283|         -2.2448248|         -0.43559607|       -0.12320881|        0.13932998|\n",
      "|  2|  0|  5|  4|  4|  5|  2|  6|  3|  1|   0|   1|   1|   0|   3|           0.9860484|         -0.61792286|        -0.86991364|         -1.0016956|        -0.32659198|         -1.3876187| 2.1099607000000002|         -0.75062808|        0.44407583|       0.076669621| -1.1153514|        -0.45027945|        -0.74553386|         -1.5716337|        -0.33511571|         1.7189322| 1.2636498999999999|         -1.0016956|         -0.88545801|          1.9524978|        -0.28689236|-0.23206856|          1.4960036|  0.46728937|         0.96370474|        0.24203755| 2.1099607000000002|  0.6548748999999999|        -0.26336565|        -0.14549291|   1.262966|  1.3643298000000001|          1.714879|         -1.0016956|         0.17902518|   3|   0|   5|   3|   2|   2|   1|   2|   3|   5|   3|   4|   4|   0|   4|  0.6548748999999999| -1.0734066999999998|        -0.53209776| -1.0734066999999998|         -0.63624381|        -0.14549291|          0.32509864|         -1.3876187|         -0.70567208|         0.1736386|         0.40957782|          0.39341244|         0.1736386|          1.4960036|         -1.5716337|         -0.63624381|          0.66260649|        -0.86991364| -1.0005819999999999|          -0.1354091|  0.6548748999999999|         0.96370474|-1.4551969999999999|-0.14321489|         -1.0016956|        -0.27138577|         -0.63624381|          0.12930036|         0.96370474|        0.32571798|        -1.2420267|        -0.86991364|         -0.61792286|         0.1736386|       -0.69328112|\n",
      "|  3|  4|  2|  2|  6|  4|  4|  2|  3|  1|   2|   5|   2|   0|   3|         -0.32081045|         -0.40968493|         -0.7584732|        -0.78256389|         0.51658501|         -1.0537127|          1.7101628|0.056472352999999996|       -0.27137061|        0.66276248|-0.88717918|         -1.5183733|         0.20268606|         -1.2478016|        -0.28689352|        0.39954795|          1.0462249|        -0.78256389|         -0.11511564|          1.7836642|-1.9105653000000002|-0.10861393|          1.1993147|-0.059643842|         0.84038908|        0.06002103|          1.7101628|          0.58079504|         0.26600653|        -0.15058897|  1.5703236|         -0.72865492|         1.3544063|        -0.78256389|        -0.18677372|   4|   3|   0|   3|   1|   0|   4|   1|   0|   5|   3|   5|   4|   6|   1|          0.58079504|         -0.83308892|        -0.19678638|         -0.83308892|          -0.5832483|        -0.15058897|          0.28803061|         -1.0537127|          0.13026766|       0.087767977|         0.25155973|          0.37631441|       0.087767977|          1.1993147|         -1.2478016|          -0.5832483|          0.55321118|         -0.7584732|         -0.43293506|         -0.16259708|          0.58079504|         0.84038908|        -0.31930817|-0.06897865|        -0.78256389|        -0.43877022|          -0.5832483|         -0.19372184|         0.84038908|        0.14342342|        -1.3857412|         -0.7584732|         -0.40968493|       0.087767977|       -0.66119234|\n",
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+--------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+--------------------+------------------+------------------+-----------+-------------------+-------------------+-------------------+-------------------+------------------+-------------------+-------------------+--------------------+-------------------+-------------------+-----------+-------------------+------------+-------------------+------------------+-------------------+--------------------+-------------------+-------------------+-----------+--------------------+------------------+-------------------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--------------------+--------------------+-------------------+--------------------+--------------------+-------------------+--------------------+-------------------+--------------------+------------------+-------------------+--------------------+------------------+-------------------+-------------------+--------------------+--------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+-----------+-------------------+-------------------+--------------------+--------------------+-------------------+------------------+------------------+-------------------+--------------------+------------------+------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "drop_list = ['# id','y']\n",
    "x = df.drop(*drop_list)\n",
    "x.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "variable-forwarding",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.ml import Pipeline\n",
    "from pyspark.ml.feature import BucketedRandomProjectionLSH, VectorAssembler\n",
    "import numpy as np\n",
    "from pyspark.sql import DataFrame, Window\n",
    "\n",
    "from sparksampling.core.mlsamplinglib.func import df_with_column_double\n",
    "import pyspark.sql.functions as F\n",
    "\n",
    "\n",
    "class SparkEditedNearestNeighbours(object):\n",
    "    def __init__(self, n_neighbors):\n",
    "        self.n_neighbors = n_neighbors\n",
    "\n",
    "    def fit_resample(self, x: DataFrame, y) -> DataFrame:\n",
    "        vectorized = self.vectorized_feature(x)\n",
    "        vectorized = vectorized.withColumn(\"index\", F.monotonically_increasing_id())\n",
    "        y = y.withColumn(\"index\", F.monotonically_increasing_id())\n",
    "\n",
    "        brp = BucketedRandomProjectionLSH(inputCol=\"features\", outputCol=\"hashes\", seed=np.random.randint(1, 65535),\n",
    "                                          bucketLength=3)\n",
    "        # smote only applies on existing minority instances\n",
    "        model = brp.fit(vectorized)\n",
    "        model.transform(vectorized)\n",
    "\n",
    "        # here distance is calculated from brp's param inputCol\n",
    "        self_join_w_distance = model.approxSimilarityJoin(vectorized, vectorized, float(\"inf\"),\n",
    "                                                          distCol=\"EuclideanDistance\")\n",
    "\n",
    "        # remove self-comparison (distance 0)\n",
    "        self_join_w_distance = self_join_w_distance.filter(self_join_w_distance.EuclideanDistance > 0)\n",
    "\n",
    "        over_original_rows = Window.partitionBy(\"datasetA\").orderBy(\"EuclideanDistance\")\n",
    "        self_similarity_df = self_join_w_distance.withColumn(\"r_num\", F.row_number().over(over_original_rows))\n",
    "        # topK\n",
    "        self_similarity_df_selected = self_similarity_df.filter(self_similarity_df.r_num <= self.n_neighbors)\n",
    "        self_similarity_df_selected = self_similarity_df_selected.withColumn(\"index_a\",\n",
    "                                                                             self_similarity_df_selected.datasetA.index)\n",
    "        self_similarity_df_selected = self_similarity_df_selected.withColumn(\"index_b\",\n",
    "                                                                             self_similarity_df_selected.datasetB.index)\n",
    "\n",
    "        # vote\n",
    "        distance_with_label = self_similarity_df_selected.join(y, self_similarity_df_selected.index_a == y.index)\n",
    "        distance_with_label = distance_with_label.withColumnRenamed('y', 'true_label').drop('index')\n",
    "        distance_with_label = distance_with_label.join(y, distance_with_label.index_b == y.index)\n",
    "        distance_with_label = distance_with_label.withColumnRenamed('y', 'pre_label').drop('index')\n",
    "        incorrect_matrix: DataFrame = distance_with_label.filter(\n",
    "            distance_with_label.true_label != distance_with_label.pre_label)\n",
    "        incorrect_count = Window.partitionBy(\"datasetA\").orderBy(\"EuclideanDistance\")\n",
    "        count_matrix = incorrect_matrix.withColumn(\"incorrect_count\", F.row_number().over(incorrect_count))\n",
    "\n",
    "        sample_index = count_matrix.filter(count_matrix.incorrect_count <= self.n_neighbors // 2).select(\n",
    "            'index_a').distinct().withColumnRenamed('index_a', 'index')\n",
    "\n",
    "        output_x = vectorized.join(sample_index).drop('features')\n",
    "        output_y = y.join(sample_index)\n",
    "\n",
    "        output = output_x.join(output_y).drop('index')\n",
    "        return output\n",
    "\n",
    "    def vectorized_feature(self, x: DataFrame) -> DataFrame:\n",
    "        x = df_with_column_double(x)\n",
    "        assembler = VectorAssembler(inputCols=x.columns, outputCol='features')\n",
    "        stages_ = []\n",
    "        stages_.append(assembler)\n",
    "        pipeline = Pipeline(stages=stages_)\n",
    "\n",
    "        vectorized = pipeline.fit(x).transform(x)\n",
    "        return vectorized\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "narrative-monroe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DataFrame[X_0: double, X_1: double, X_2: double, X_3: double, X_4: double, X_5: double, X_6: double, X_7: double, X_8: double, X_9: double, X_10: double, X_11: double, X_12: double, X_13: double, X_14: double, X_15: double, X_16: double, X_17: double, X_18: double, X_19: double, X_20: double, X_21: double, X_22: double, X_23: double, X_24: double, X_25: double, X_26: double, X_27: double, X_28: double, X_29: double, X_30: double, X_31: double, X_32: double, X_33: double, X_34: double, X_35: double, X_36: double, X_37: double, X_38: double, X_39: double, X_40: double, X_41: double, X_42: double, X_43: double, X_44: double, X_45: double, X_46: double, X_47: double, X_48: double, X_49: double, X_50: double, X_51: double, X_52: double, X_53: double, X_54: double, X_55: double, X_56: double, X_57: double, X_58: double, X_59: double, X_60: double, X_61: double, X_62: double, X_63: double, X_64: double, X_65: double, X_66: double, X_67: double, X_68: double, X_69: double, X_70: double, X_71: double, X_72: double, X_73: double, X_74: double, X_75: double, X_76: double, X_77: double, X_78: double, X_79: double, X_80: double, X_81: double, X_82: double, X_83: double, X_84: double, X_85: double, X_86: double, X_87: double, X_88: double, X_89: double, X_90: double, X_91: double, X_92: double, X_93: double, X_94: double, X_95: double, X_96: double, X_97: double, X_98: double, X_99: double, y: string]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from sparksampling.config import SPARK_CONF\n",
    "\n",
    "conf = SPARK_CONF\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "df = spark.read.csv(\"hdfs://localhost:9000/dataset/ten_million_top1k.csv\", header=True)\n",
    "\n",
    "y = df[['y']]\n",
    "\n",
    "drop_list = ['# id', 'y']\n",
    "x = df.drop(*drop_list)\n",
    "\n",
    "enn = SparkEditedNearestNeighbours(n_neighbors=3)\n",
    "df = enn.fit_resample(x, y)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "tender-episode",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+----------+---------+-----------+-----------+-------------------+-----------+-----------+----------+----------+-----------+----------+----------+-----------+----------+----------+-------------------+-----------+----------+--------+------------------+----------+----------+----------+----------+---------+-----------+---------+----------+---------+-----------+-----------+---------+-----------+----------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---------+----------+----------+----------+------------------+---------+-------------------+-------------------+-----------+------------------+---------+-----------+------------------+----------+-----------+------------------+----------+---------+------------------+------------------+---------+----------+---------+----------+-----------+----------+------------------+----------+----------+---------+----------+---------+----------+------------------+----------+---+\n",
      "|X_0|X_1|X_2|X_3|X_4|X_5|X_6|X_7|X_8|X_9|X_10|X_11|X_12|X_13|X_14|       X_15|      X_16|     X_17|       X_18|       X_19|               X_20|       X_21|       X_22|      X_23|      X_24|       X_25|      X_26|      X_27|       X_28|      X_29|      X_30|               X_31|       X_32|      X_33|    X_34|              X_35|      X_36|      X_37|      X_38|      X_39|     X_40|       X_41|     X_42|      X_43|     X_44|       X_45|       X_46|     X_47|       X_48|      X_49|X_50|X_51|X_52|X_53|X_54|X_55|X_56|X_57|X_58|X_59|X_60|X_61|X_62|X_63|X_64|     X_65|      X_66|      X_67|      X_68|              X_69|     X_70|               X_71|               X_72|       X_73|              X_74|     X_75|       X_76|              X_77|      X_78|       X_79|              X_80|      X_81|     X_82|              X_83|              X_84|     X_85|      X_86|     X_87|      X_88|       X_89|      X_90|              X_91|      X_92|      X_93|     X_94|      X_95|     X_96|      X_97|              X_98|      X_99|  y|\n",
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+----------+---------+-----------+-----------+-------------------+-----------+-----------+----------+----------+-----------+----------+----------+-----------+----------+----------+-------------------+-----------+----------+--------+------------------+----------+----------+----------+----------+---------+-----------+---------+----------+---------+-----------+-----------+---------+-----------+----------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---------+----------+----------+----------+------------------+---------+-------------------+-------------------+-----------+------------------+---------+-----------+------------------+----------+-----------+------------------+----------+---------+------------------+------------------+---------+----------+---------+----------+-----------+----------+------------------+----------+----------+---------+----------+---------+----------+------------------+----------+---+\n",
      "|6.0|5.0|4.0|5.0|2.0|2.0|3.0|3.0|6.0|5.0| 1.0| 1.0| 6.0| 3.0| 5.0|-0.99513124|-3.6292718|2.4204502|-0.97570679|-0.10658287|-2.6091914000000003|-0.50468426|-0.40461826|0.76365848|-0.9612525|-0.43076305|0.21113412|-1.6542911|-0.70296259|0.71313918|0.10048822|-1.2143941999999999|-0.97570679|0.57761198|1.844862|1.2231096000000001|-3.2379518|0.19014047|-1.9326177|-2.6872125|1.5403452|-0.50468426|-2.228743|0.40144393|1.3882938|-0.92881286|-0.51232563|1.0622768|-0.97570679|-1.5348242| 3.0| 1.0| 0.0| 5.0| 3.0| 2.0| 3.0| 6.0| 4.0| 4.0| 5.0| 3.0| 3.0| 4.0| 3.0|-2.228743|-1.2733956|0.52975239|-1.2733956|2.9516687000000004|1.3882938|-1.0942843999999998|-2.6091914000000003|0.039236337|2.1533357000000004|3.2339791|-0.29600692|2.1533357000000004|0.19014047|-0.70296259|2.9516687000000004|0.71730253|2.4204502|0.8193011999999998|2.2221227999999997|-2.228743|-2.6872125|-1.119919|-1.9173942|-0.97570679|-1.7131415|2.9516687000000004|-1.6082825|-2.6872125|-0.809006|0.67904304|2.4204502|-3.6292718|2.1533357000000004|0.21443366|  0|\n",
      "+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+----------+---------+-----------+-----------+-------------------+-----------+-----------+----------+----------+-----------+----------+----------+-----------+----------+----------+-------------------+-----------+----------+--------+------------------+----------+----------+----------+----------+---------+-----------+---------+----------+---------+-----------+-----------+---------+-----------+----------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+---------+----------+----------+----------+------------------+---------+-------------------+-------------------+-----------+------------------+---------+-----------+------------------+----------+-----------+------------------+----------+---------+------------------+------------------+---------+----------+---------+----------+-----------+----------+------------------+----------+----------+---------+----------+---------+----------+------------------+----------+---+\n",
      "only showing top 1 row\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.show(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "indonesian-departure",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-3c9a60fd698f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/anaconda3/envs/sample/lib/python3.7/site-packages/pyspark/sql/dataframe.py\u001b[0m in \u001b[0;36mcount\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    583\u001b[0m         \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    584\u001b[0m         \"\"\"\n\u001b[0;32m--> 585\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    586\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    587\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mignore_unicode_prefix\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/sample/lib/python3.7/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1301\u001b[0m             \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEND_COMMAND_PART\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1302\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1303\u001b[0;31m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1304\u001b[0m         return_value = get_return_value(\n\u001b[1;32m   1305\u001b[0m             answer, self.gateway_client, self.target_id, self.name)\n",
      "\u001b[0;32m~/anaconda3/envs/sample/lib/python3.7/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m   1031\u001b[0m         \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1032\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1033\u001b[0;31m             \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1034\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mbinary\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1035\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_connection_guard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/sample/lib/python3.7/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command)\u001b[0m\n\u001b[1;32m   1198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1199\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1200\u001b[0;31m             \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msmart_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1201\u001b[0m             \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Answer received: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0manswer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1202\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRETURN_MESSAGE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/sample/lib/python3.7/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    587\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    588\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 589\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    590\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    591\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "advance-drama",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "150\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "DataFrame[y: string]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from imblearn.over_sampling import SMOTE\n",
    "from pyspark.sql import SparkSession\n",
    "from sparksampling.config import SPARK_CONF\n",
    "from sparksampling.core.mlsamplinglib.smote import SparkSMOTE\n",
    "\n",
    "conf = SPARK_CONF\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "indf = spark.read.csv(\"hdfs://localhost:9000/dataset/ten_million_top1k.csv\", header=True)\n",
    "\n",
    "df = indf.filter(indf.y == 1).limit(100).union(indf.filter(indf.y == 0).limit(50))\n",
    "\n",
    "print(df.count())\n",
    "\n",
    "y = df.select('y')\n",
    "y\n",
    "\n",
    "# find min label\n",
    "\n",
    "\n",
    "# drop_list = ['# id', 'y']\n",
    "# x = df.drop(*drop_list)\n",
    "\n",
    "# x = x.toPandas()\n",
    "# y = y.toPandas()\n",
    "# smote = SparkSMOTE(k_neighbors=3)\n",
    "# x_fit, y_fit = smote.fit_resample(x.values, y.values)\n",
    "\n",
    "# # smote = SparkSMOTE(k_neighbors=3)\n",
    "# # result_df = smote.fit_resample(x, y)\n",
    "\n",
    "# result_df = pd.concat([pd.DataFrame(x_fit, columns=x.columns), pd.DataFrame(y_fit, columns=y.columns)], axis=1)\n",
    "# print(result_df.count())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "danish-syracuse",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'0': 50, '1': 0}"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels = y.distinct().toPandas().to_numpy().reshape(-1)\n",
    "label_index = y.columns[0]\n",
    "count_map = {}\n",
    "for label in labels:\n",
    "    count_map[label] = y.filter(f\"{label_index} == {label}\").count()\n",
    "\n",
    "max_label = max(count_map, key=count_map.get)\n",
    "max_num = count_map[max_label]\n",
    "t_map = {}\n",
    "sample_labels = count_map.keys()\n",
    "for sample_label in sample_labels:\n",
    "    t_map[sample_label] = max_num - count_map[sample_label]\n",
    "    \n",
    "t_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "celtic-array",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "from functools import reduce\n",
    "import pyspark.sql.functions as F\n",
    "from pyspark.sql import DataFrame\n",
    "from pyspark.sql.window import Window\n",
    "from pyspark.ml.linalg import VectorUDT\n",
    "from pyspark.ml.feature import StringIndexer, VectorAssembler, BucketedRandomProjectionLSH\n",
    "from pyspark.sql.functions import col\n",
    "from pyspark.ml import Pipeline\n",
    "from typing import Dict\n",
    "\n",
    "from sparksampling.core.mlsamplinglib.func import to_array, df_with_column_double, vectorized_feature\n",
    "\n",
    "\n",
    "def pre_smote_df_process(df: DataFrame, num_cols, cat_cols, target_col, require_indexing=True, index_suffix=\"_index\"):\n",
    "    '''\n",
    "    string indexer (optional) and vector assembler\n",
    "    inputs:\n",
    "    * df: spark df, original\n",
    "    * num_cols: numerical cols to be assembled\n",
    "    * cat_cols: categorical cols to be stringindexed\n",
    "    * target_col: prediction target\n",
    "    * index_suffix: will be the suffix after string indexing\n",
    "    output:\n",
    "    * vectorized: spark df, after stringindex and vector assemble, ready for smote\n",
    "    '''\n",
    "    if (df.select(target_col).distinct().count() != 2):\n",
    "        raise ValueError(\"Target col must have exactly 2 classes\")\n",
    "\n",
    "    if target_col in num_cols:\n",
    "        num_cols.remove(target_col)\n",
    "\n",
    "    # only assembled numeric columns into features\n",
    "    assembler = VectorAssembler(inputCols=num_cols, outputCol='features')\n",
    "\n",
    "    stages_ = []\n",
    "    stages_.append(assembler)\n",
    "\n",
    "    # setting to drop original num cols and cat cols\n",
    "    drop_cols = num_cols\n",
    "\n",
    "    # index the string cols, except possibly for the label col\n",
    "    if require_indexing == True:\n",
    "        str_ind_stages = [StringIndexer(inputCol=column, outputCol=column + index_suffix).fit(df) for column in\n",
    "                          list(set(cat_cols) - {target_col})]\n",
    "        stages_ += str_ind_stages\n",
    "        # also drop cat cols if str index applied\n",
    "        drop_cols += (cat_cols)\n",
    "\n",
    "    # add the stage of numerical vector assembler\n",
    "    pipeline = Pipeline(stages=stages_)\n",
    "\n",
    "    pos_vectorized = pipeline.fit(df).transform(df)\n",
    "\n",
    "    keep_cols = [a for a in pos_vectorized.columns if a not in drop_cols]\n",
    "\n",
    "    vectorized = pos_vectorized.select(*keep_cols).withColumn('label', pos_vectorized[target_col]).drop(target_col)\n",
    "\n",
    "    print(\"return num cols vectorized df and stages for testset transformation\")\n",
    "\n",
    "    return vectorized, stages_\n",
    "\n",
    "\n",
    "def smote(vectorized_sdf, smote_config):\n",
    "    '''\n",
    "    contains logic to perform smote oversampling, given a spark df with 2 classes\n",
    "    inputs:\n",
    "    * vectorized_sdf: cat cols are already stringindexed, num cols are assembled into 'features' vector\n",
    "      df target col should be 'label'\n",
    "    * smote_config: config obj containing smote parameters\n",
    "    output:\n",
    "    * oversampled_df: spark df after smote oversampling\n",
    "    '''\n",
    "    dataInput_min = vectorized_sdf[vectorized_sdf['label'] == 1]\n",
    "    dataInput_maj = vectorized_sdf[vectorized_sdf['label'] == 0]\n",
    "\n",
    "    # LSH, bucketed random projection\n",
    "    brp = BucketedRandomProjectionLSH(inputCol=\"features\", outputCol=\"hashes\", seed=smote_config.seed,\n",
    "                                      bucketLength=smote_config.bucket_length)\n",
    "    # smote only applies on existing minority instances\n",
    "    model = brp.fit(dataInput_min)\n",
    "    model.transform(dataInput_min)\n",
    "\n",
    "    # here distance is calculated from brp's param inputCol\n",
    "    self_join_w_distance = model.approxSimilarityJoin(dataInput_min, dataInput_min, float(\"inf\"),\n",
    "                                                      distCol=\"EuclideanDistance\")\n",
    "\n",
    "    # remove self-comparison (distance 0)\n",
    "    self_join_w_distance = self_join_w_distance.filter(self_join_w_distance.EuclideanDistance > 0)\n",
    "\n",
    "    over_original_rows = Window.partitionBy(\"datasetA\").orderBy(\"EuclideanDistance\")\n",
    "\n",
    "    self_similarity_df = self_join_w_distance.withColumn(\"r_num\", F.row_number().over(over_original_rows))\n",
    "\n",
    "    self_similarity_df_selected = self_similarity_df.filter(self_similarity_df.r_num <= smote_config.k)\n",
    "\n",
    "    over_original_rows_no_order = Window.partitionBy('datasetA')\n",
    "\n",
    "    # list to store batches of synthetic data\n",
    "    res = []\n",
    "\n",
    "    # two udf for vector add and subtract, subtraction include a random factor [0,1]\n",
    "    subtract_vector_udf = F.udf(lambda arr: random.uniform(0, 1) * (arr[0] - arr[1]), VectorUDT())\n",
    "    add_vector_udf = F.udf(lambda arr: arr[0] + arr[1], VectorUDT())\n",
    "\n",
    "    # retain original columns\n",
    "    original_cols = dataInput_min.columns\n",
    "\n",
    "    for i in range(smote_config.multiplier):\n",
    "        print(\"generating batch %s of synthetic instances\" % i)\n",
    "        # logic to randomly select neighbour: pick the largest random number generated row as the neighbour\n",
    "        df_random_sel = self_similarity_df_selected.withColumn(\"rand\", F.rand()).withColumn('max_rand',\n",
    "                                                                                            F.max('rand').over(\n",
    "                                                                                                over_original_rows_no_order)) \\\n",
    "            .where(F.col('rand') == F.col('max_rand')).drop(*['max_rand', 'rand', 'r_num'])\n",
    "        # create synthetic feature numerical part\n",
    "        df_vec_diff = df_random_sel.select('*',\n",
    "                                           subtract_vector_udf(F.array('datasetA.features', 'datasetB.features')).alias(\n",
    "                                               'vec_diff'))\n",
    "        df_vec_modified = df_vec_diff.select('*',\n",
    "                                             add_vector_udf(F.array('datasetA.features', 'vec_diff')).alias('features'))\n",
    "\n",
    "        # for categorical cols, either pick original or the neighbour's cat values\n",
    "        for c in original_cols:\n",
    "            # randomly select neighbour or original data\n",
    "            col_sub = random.choice(['datasetA', 'datasetB'])\n",
    "            val = \"{0}.{1}\".format(col_sub, c)\n",
    "            if c != 'features':\n",
    "                # do not unpack original numerical features\n",
    "                df_vec_modified = df_vec_modified.withColumn(c, F.col(val))\n",
    "\n",
    "        # this df_vec_modified is the synthetic minority instances,\n",
    "        df_vec_modified = df_vec_modified.drop(*['datasetA', 'datasetB', 'vec_diff', 'EuclideanDistance'])\n",
    "\n",
    "        res.append(df_vec_modified)\n",
    "\n",
    "    dfunion = reduce(DataFrame.unionAll, res)\n",
    "    # union synthetic instances with original full (both minority and majority) df\n",
    "    oversampled_df = dfunion.union(vectorized_sdf.select(dfunion.columns))\n",
    "\n",
    "    return oversampled_df\n",
    "\n",
    "\n",
    "def restore_smoted_df(num_cols, smoted_df, vectorized_col):\n",
    "    '''\n",
    "    restore smoted df to original type\n",
    "    with original num_cols names\n",
    "    and stringIndexed cat cols, suffix _index\n",
    "    depending on to_array udf to unpack vectorized col\n",
    "    * vectorized_col: str, col that is vectorized\n",
    "    '''\n",
    "    # based on the assumption that vectorization is by the list sequence of num_cols\n",
    "    # to array first\n",
    "    smoted_df = smoted_df.withColumn(\"array_num_cols\", to_array(col(vectorized_col)))\n",
    "    # restore all num_cols\n",
    "    for i in range(len(num_cols)):\n",
    "        smoted_df = smoted_df.withColumn(num_cols[i], col(\"array_num_cols\")[i])\n",
    "\n",
    "    drop_cols = [vectorized_col, 'array_num_cols']\n",
    "    return smoted_df.drop(*drop_cols)\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "class SparkSMOTE(object):\n",
    "    def __init__(self, k_neighbors):\n",
    "        self.k_neighbors = k_neighbors\n",
    "\n",
    "    def fit_resample(self, x: DataFrame, y: DataFrame) -> DataFrame:\n",
    "        vectorized = vectorized_feature(x)\n",
    "        vectorized = vectorized.withColumn(\"index\", F.monotonically_increasing_id())\n",
    "        min_label, t_count = self.get_sample_label_and_t_round(y)\n",
    "        y = y.withColumn(\"index\", F.monotonically_increasing_id())\n",
    "        label_index = self.get_label_index(y)\n",
    "        vectorized_sdf = vectorized.join(y, vectorized.index == y.index).drop(\"index\")\n",
    "        df_min = vectorized_sdf.filter(f\"{label_index} == {min_label}\")\n",
    "\n",
    "        brp = BucketedRandomProjectionLSH(inputCol=\"features\", outputCol=\"hashes\", seed=np.random.randint(1, 65535),\n",
    "                                          bucketLength=3)\n",
    "        # smote only applies on existing minority instances\n",
    "        model = brp.fit(df_min)\n",
    "        model.transform(df_min)\n",
    "\n",
    "        # here distance is calculated from brp's param inputCol\n",
    "        self_join_w_distance = model.approxSimilarityJoin(df_min, df_min, float(\"inf\"),\n",
    "                                                          distCol=\"EuclideanDistance\")\n",
    "\n",
    "        # remove self-comparison (distance 0)\n",
    "        self_join_w_distance = self_join_w_distance.filter(self_join_w_distance.EuclideanDistance > 0)\n",
    "        over_original_rows = Window.partitionBy(\"datasetA\").orderBy(\"EuclideanDistance\")\n",
    "        self_similarity_df = self_join_w_distance.withColumn(\"r_num\", F.row_number().over(over_original_rows))\n",
    "        self_similarity_df_selected = self_similarity_df.filter(self_similarity_df.r_num <= self.k_neighbors)\n",
    "\n",
    "        over_original_rows_no_order = Window.partitionBy('datasetA')\n",
    "        original_cols = df_min.columns\n",
    "        subtract_vector_udf = F.udf(lambda arr: random.uniform(0, 1) * (arr[0] - arr[1]), VectorUDT())\n",
    "        add_vector_udf = F.udf(lambda arr: arr[0] + arr[1], VectorUDT())\n",
    "        res = []\n",
    "        for _ in range(t_count):\n",
    "            df_random_sel = self_similarity_df_selected.withColumn(\"rand\", F.rand()).withColumn('max_rand',\n",
    "                                                                                                F.max('rand').over(\n",
    "                                                                                                    over_original_rows_no_order)) \\\n",
    "                .where(F.col('rand') == F.col('max_rand')).drop(*['max_rand', 'rand', 'r_num'])\n",
    "            df_vec_diff = df_random_sel.select('*',\n",
    "                                               subtract_vector_udf(\n",
    "                                                   F.array('datasetA.features', 'datasetB.features')).alias(\n",
    "                                                   'vec_diff'))\n",
    "            df_vec_modified = df_vec_diff.select('*',\n",
    "                                                 add_vector_udf(F.array('datasetA.features', 'vec_diff')).alias(\n",
    "                                                     'features'))\n",
    "\n",
    "            for c in original_cols:\n",
    "                # randomly select neighbour or original data\n",
    "                col_sub = random.choice(['datasetA', 'datasetB'])\n",
    "                val = \"{0}.{1}\".format(col_sub, c)\n",
    "                if c != 'features':\n",
    "                    # do not unpack original numerical features\n",
    "                    df_vec_modified = df_vec_modified.withColumn(c, F.col(val))\n",
    "\n",
    "            # this df_vec_modified is the synthetic minority instances,\n",
    "            df_vec_modified = df_vec_modified.drop(\n",
    "                *['features', 'datasetA', 'datasetB', 'vec_diff', 'EuclideanDistance'])\n",
    "\n",
    "            res.append(df_vec_modified)\n",
    "        dfunion = reduce(DataFrame.unionAll, res)\n",
    "        oversampled_df = dfunion.union(vectorized_sdf.select(dfunion.columns))\n",
    "        return oversampled_df\n",
    "    def get_sample_label_and_t_round(self, y: DataFrame) -> (str, int):\n",
    "        labels = y.distinct().toPandas().to_numpy().reshape(-1)\n",
    "        label_index = y.columns[0]\n",
    "        count_map = {}\n",
    "        for label in labels:\n",
    "            count_map[label] = y.filter(f\"{label_index} == {label}\").count()\n",
    "        max_num = count_map[max(count_map, key=count_map.get)]\n",
    "        min_num = count_map[min(count_map, key=count_map.get)]\n",
    "        t_count = (max_num - min_num) // min_num\n",
    "        print(f\"run tcount:{t_count}\")\n",
    "        return min(count_map, key=count_map.get), t_count\n",
    "\n",
    "    def get_label_index(self, y: DataFrame):\n",
    "        return y.columns[0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "democratic-society",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250\n",
      "run tcount:3\n",
      "400\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from imblearn.over_sampling import SMOTE\n",
    "from pyspark.sql import SparkSession\n",
    "from sparksampling.config import SPARK_CONF\n",
    "\n",
    "conf = SPARK_CONF\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "indf = spark.read.csv(\"hdfs://localhost:9000/dataset/ten_million_top1k.csv\", header=True)\n",
    "\n",
    "df = indf.filter(indf.y == 1).limit(100).union(indf.filter(indf.y == 0).limit(10))\n",
    "\n",
    "print(df.count())\n",
    "\n",
    "y = df.select('y')\n",
    "drop_list = ['# id', 'y']\n",
    "x = df.drop(*drop_list)\n",
    "\n",
    "# x = x.toPandas()\n",
    "# y = y.toPandas()\n",
    "# smote = SMOTE(k_neighbors=3)\n",
    "# x_fit, y_fit = smote.fit_resample(x.values, y.values)\n",
    "\n",
    "smote = SparkSMOTE(k_neighbors=3)\n",
    "result_df = smote.fit_resample(x, y)\n",
    "\n",
    "# result_df = pd.concat([pd.DataFrame(x_fit, columns=x.columns), pd.DataFrame(y_fit, columns=y.columns)], axis=1)\n",
    "print(result_df.count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "known-frederick",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+--------------------+------------------+-----------+-----------+-------------------+--------------------+-------------------+-------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------+------------------+-------------------+-----------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------------+------------------+-------------------+-----------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-------------------+------------------+-------------------+-------------------+------------------+-----------+--------------------+-------------------+-------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-----------+-------------------+------------------+------------------+-------------------+-----------+-------------------+------------------+-------------------+------------------+------------------+---+\n",
      "|            features|X_0|X_1|X_2|X_3|X_4|X_5|X_6|X_7|X_8|X_9|X_10|X_11|X_12|X_13|X_14|       X_15|                X_16|              X_17|       X_18|       X_19|               X_20|                X_21|               X_22|               X_23|               X_24|              X_25|               X_26|              X_27|              X_28|               X_29|              X_30|               X_31|       X_32|                X_33|               X_34|               X_35|                X_36|                X_37|                X_38|               X_39|               X_40|                X_41|                X_42|                X_43|                X_44|                X_45|              X_46|               X_47|       X_48|               X_49|X_50|X_51|X_52|X_53|X_54|X_55|X_56|X_57|X_58|X_59|X_60|X_61|X_62|X_63|X_64|               X_65|              X_66|               X_67|               X_68|              X_69|       X_70|                X_71|               X_72|               X_73|              X_74|              X_75|              X_76|                X_77|                X_78|              X_79|              X_80|              X_81|                X_82|               X_83|               X_84|               X_85|               X_86|               X_87|               X_88|       X_89|               X_90|              X_91|              X_92|               X_93|       X_94|               X_95|              X_96|               X_97|              X_98|              X_99|  y|\n",
      "+--------------------+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+--------------------+------------------+-----------+-----------+-------------------+--------------------+-------------------+-------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------+------------------+-------------------+-----------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------------+------------------+-------------------+-----------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-------------------+------------------+-------------------+-------------------+------------------+-----------+--------------------+-------------------+-------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-----------+-------------------+------------------+------------------+-------------------+-----------+-------------------+------------------+-------------------+------------------+------------------+---+\n",
      "|[-1.9474191489429...|4.0|2.0|0.0|4.0|4.0|3.0|3.0|3.0|6.0|0.0| 2.0| 2.0| 4.0| 1.0| 4.0| 0.76313938|          0.96998781|        0.65232108|  1.1075617|  0.5610991|         0.48232071|          -2.0904775|        0.021990818|         0.95239153|          -1.176407|        0.76976473|        -0.70358114|       -0.44425798|         1.0626648|         0.27206411|       -0.36598654|          -1.165582|  1.1075617|          0.65561273|         0.60920458|-2.8626357000000002|         -0.56166941| -1.5340943999999999|          0.63743619|-1.3665513999999999|         0.10591126|          -2.0904775| -1.0209488000000002|         -0.75183377|          0.01594428|         -0.32089286|       -0.82744497|         -1.8385179|  1.1075617|        -0.87505944| 3.0| 5.0| 1.0| 1.0| 0.0| 3.0| 6.0| 0.0| 4.0| 3.0| 3.0| 2.0| 3.0| 0.0| 3.0|        -0.45255478|         1.2084329|-1.5241122999999999|         0.54390757|         1.1721897| 0.43159717|         -0.22580987|         0.48232071|        -0.48015287|       -0.37993689|       -0.72140342|2.5982093999999996|          0.35849345| -1.5340943999999999|         1.0626648|        0.36519687|        0.90368525|  1.2323540000000002|        -0.49419257|       -0.073256574|        -0.45255478|        -0.72210293|          1.9169317|         0.32675604|  1.1075617|          1.0151651|        0.36519687|        -1.6695482|        -0.72210293|-0.65623288|          -1.185088|        0.65232108|        -0.35092328|       -0.37993689|        0.77312691|  0|\n",
      "|[-0.0587229365380...|1.0|0.0|2.0|1.0|6.0|6.0|1.0|3.0|6.0|0.0| 5.0| 6.0| 6.0| 1.0| 3.0| 0.43857708|          -1.5942791|         1.0482359|-0.43840642| 0.55602709|         0.68569007|         -0.19595925|         0.44851295|        -0.98162248|        -0.12777098|        0.88973496|-1.0067443999999999|        0.67512936|          1.234219|         0.30791875|         0.5797576|-0.5162270999999999|-0.43840642|           1.1366207|        -0.15582088|        -0.69935631|         -0.45453498|          0.10068223|-0.00288990150000...|-1.3903583000000002| 1.1345496000000002|         -0.19595925| -1.0266165999999999|         -0.43253976|          0.60537831|        -0.040519753|       -0.97945465|         0.48462236|-0.43840642|          2.1872039| 1.0| 5.0| 6.0| 3.0| 1.0| 2.0| 1.0| 3.0| 1.0| 3.0| 2.0| 6.0| 1.0| 0.0| 5.0|        -0.96692539|       -0.56942266|         0.97141117|         0.68178198|         1.1571725| 0.40954515|         -0.47470629|         0.68569007|         0.90883306|        0.94366253|         1.4189803|       -0.93090388|          0.28536383|          0.10068223|          1.234219|         1.2832703|        -2.0040812|           1.2539805|           -0.78452|         0.97011964|        -0.96692539|-1.1637883999999998|         0.24394932|        -0.84013161|-0.43840642|       -0.047662341|         1.2832703|         1.3270662|-1.1637883999999998|   1.847955|         0.23272163|         1.0482359|        -0.19596123|        0.94366253|        0.55413142|  0|\n",
      "|[-1.6095024326050...|6.0|2.0|0.0|4.0|4.0|2.0|6.0|3.0|6.0|0.0| 5.0| 2.0| 4.0| 2.0| 1.0|-0.16656857|           1.0077745|        0.52507662|  1.0512124|  2.4257922|          1.2309041|          -1.9104393|         0.37677234|        -0.61661435|         0.87617751|        0.94202771|         0.59368846|        0.30718367|1.3301321000000002|         0.22830084|2.3046450000000003|         -1.0358993|  1.0512124|         -0.80046135|        -0.91404927|          -1.895963|          0.28547551| -1.4195752000000001|          0.88307318|        -0.72700356|          1.0274308|          -1.9104393|         -0.48258383|         -0.13750778|        -0.028905008|           1.1217928|       -0.33013971|         -1.7274767|  1.0512124|        -0.51982974| 3.0| 5.0| 0.0| 2.0| 0.0| 1.0| 1.0| 0.0| 4.0| 3.0| 3.0| 3.0| 1.0| 1.0| 3.0|        -0.34702053|         1.1535095|          1.4746455|         0.93100776|        0.44650815|0.081833766|         -0.17371026|          1.2309041|       -0.076009164|       -0.41771786|       -0.76630083|        0.41054772|         -0.20594382| -1.4195752000000001|1.3301321000000002|         0.2438216|        0.76861888|          0.65639777|         0.44005374|        -0.13728486|        -0.34702053|        -0.58100048|          1.0748479|         0.36130458|  1.0512124|          1.2232214|         0.2438216|       -0.24503792|        -0.58100048|   2.328492|       -0.094382757|        0.52507662|         0.61825443|       -0.41771786|       -0.99283735|  0|\n",
      "|[0.53652518180577...|4.0|0.0|0.0|4.0|2.0|4.0|3.0|0.0|4.0|1.0| 2.0| 2.0| 4.0| 0.0| 3.0| 0.95821827|         -0.96754361|1.5229888999999999| 0.31193896| 0.54920887|          1.0516067|          -1.6379172| 0.6177236999999999|        -0.63063208|        -0.49263487|        0.92355815|0.07353356400000001|       0.042546433|1.2968703000000001|         0.50267673|       -0.31404764|         -1.3304399| 0.31193896|          0.82177432|         0.39495707|-1.1052783000000002|0.044257474000000005|         -0.95135467|         -0.65157324|        -0.94171624|         0.33119959|          -1.6379172|         -0.65885025|          0.73789218|           0.6315476|           1.7696506|       -0.75878699|        -0.76703547| 0.31193896|         0.24960985| 4.0| 0.0| 0.0| 1.0| 0.0| 1.0| 2.0| 5.0| 5.0| 2.0| 4.0| 2.0| 6.0| 3.0| 1.0|         -1.3023124|        0.24697206|-1.4569239999999999|         0.84847915|         0.6770472| 0.18833476| -0.6418583999999999|          1.0516067|         -1.9529339|0.7061940999999999|        0.95872247|       -0.57830973|-0.04576903099999...|         -0.95135467|1.2968703000000001|         1.5665532|         -1.304913|          0.84981928|         0.68103489|          0.9441339|         -1.3023124|-1.6894136999999998|         -0.5793121|        -0.63661973| 0.31193896| 2.1599652000000003|         1.5665532|       -0.12767143|-1.6894136999999998| 0.56427227|         0.26779767|1.5229888999999999|         0.35237825|0.7061940999999999|        0.17470135|  0|\n",
      "|[-3.2218828949057...|6.0|0.0|0.0|4.0|5.0|6.0|3.0|5.0|1.0|0.0| 2.0| 4.0| 2.0| 1.0| 3.0| 0.21333899|  1.0878735000000002|        0.52216505|  1.1056659|-0.61941908|          1.4525852| -1.9858133000000002|          1.9865745| 0.8356005999999999|        -0.22995377|          1.074019|         0.51527064|         -1.380583|         1.5187805|         0.23054801|        -1.6434161|-1.0670278999999998|  1.1056659|         -0.48751133|         -0.3992445|          0.2197837|          0.40020757|           -1.481435|          0.49045209|        -0.75624469|         0.58462228| -1.9858133000000002|         -0.49129243|          0.10456839|-0.04450051400000...|          0.28373108|       -0.19323306|-1.8113578000000001|  1.1056659|         0.56313865| 1.0| 3.0| 6.0| 5.0| 3.0| 4.0| 6.0| 0.0| 1.0| 4.0| 5.0| 6.0| 1.0| 0.0| 5.0|        -0.33858206|         1.2153614|         0.46733423| 1.0818413999999998|        0.43317249|0.059011552|         -0.16970747|          1.4525852|        -0.59752593|       -0.45763925|       -0.83211893|       -0.80979438|-0.28432209999999997|           -1.481435|         1.5187805|        0.22354322|         1.2291282|          0.68293812|       -0.016883483|        -0.16608728|        -0.33858206|        -0.57768593|          -0.703504|         0.39640272|  1.1056659| 1.4724758999999998|        0.22354322|        0.94690378|        -0.57768593| 0.96984901|         0.67467429|        0.52216505|         0.78536502|       -0.45763925|1.1784568000000002|  0|\n",
      "|[4.0,2.0,4.386694...|4.0|2.0|0.0|4.0|5.0|3.0|3.0|3.0|6.0|0.0| 2.0| 4.0| 4.0| 1.0| 4.0| 0.76313938|         -0.13061351|1.0905936999999999| 0.61891687|  0.5610991|         0.48232071|          -1.7369641|         0.81972983|         0.95239153|          1.0654774|        0.76976473|        -0.04395598|       -0.37998291|         1.0626648|         0.38301177|       -0.60381731|         -1.1946504| 0.61891687|  1.6340270000000001|         0.60920458|       -0.089754359|         -0.56166941|           -1.138793|         -0.96423197|-1.3665513999999999|-0.6512425000000001|          -1.7369641| -1.0209488000000002|         -0.75183377|          0.34886264|         -0.32089286|       -0.82744497|-1.1624983999999998| 0.61891687|        0.033060647| 3.0| 5.0| 1.0| 1.0| 0.0| 2.0| 1.0| 5.0| 4.0| 1.0| 2.0| 2.0| 3.0| 0.0| 3.0|        -0.88982716|        0.62466146|-1.5241122999999999|         0.54390757|         1.1721897| 0.43159717|         -0.43968301|         0.48232071|        -0.48015287|        0.22855651|        0.22678614|2.5982093999999996|          0.35849345|           -1.138793|         1.0626648|        0.99760502|        0.95861242|  1.2323540000000002|         0.39432878|         0.48210206|        -0.88982716|         -1.2091591|         0.29982415|        -0.21243522| 0.61891687|          1.0151651|        0.99760502|        0.71824689|         -1.2091591|-0.65623288|        -0.22165825|1.0905936999999999|        -0.35092328|        0.22855651|        0.53441502|  0|\n",
      "|[0.0,6.5023406060...|0.0|5.0|0.0|1.0|5.0|3.0|2.0|0.0|2.0|5.0| 1.0| 4.0| 4.0| 2.0| 1.0| 0.72019061| -1.5809043999999999|         1.0675763|-0.41639175|-0.99340385|-1.3170819999999999|         -0.24250456|        -0.98784015|       -0.033878285|        -0.66885209|       -0.19315662|        -0.74192744|         -0.520946|       -0.32202237|         0.31535432|       -0.64926189|-0.5441649000000001|-0.41639175|         -0.40594891|         -1.4064762|         -1.7480286|             -1.6753|         0.067717914|           1.2796215|-1.4220363999999999|        -0.26683532|         -0.24250456| -1.1754632999999999|          -1.1981965|          0.60867978|            1.354457|         2.2726616|         0.44689248|-0.41639175|-0.9209433000000001| 4.0| 5.0| 3.0| 1.0| 0.0| 4.0| 6.0| 3.0| 4.0| 2.0| 4.0| 4.0| 1.0| 1.0| 4.0|        -0.98151252|       -0.54584744|        -0.28898054|-0.6319348000000001|1.5504106000000002| 0.72500982|         -0.48194693|-1.3170819999999999|          -1.561793|        0.93998115|         1.4101713|      -0.049859957|  1.1135959999999998|         0.067717914|       -0.32202237|         1.2974958|        0.80179333|           1.2809208|        -0.38775063|         0.97325545|        -0.98151252|         -1.1852143|         0.35571171|        -0.83710486|-0.41639175|         0.64872859|         1.2974958|        -1.0692248|         -1.1852143|  1.0061232|         0.34774379|         1.0675763|-1.8679618999999998|        0.93998115|         1.5863257|  0|\n",
      "|[4.0,0.7499275568...|4.0|5.0|0.0|4.0|6.0|2.0|1.0|5.0|2.0|1.0| 5.0| 2.0| 0.0| 3.0| 1.0|-0.78270729|           -2.094262|         1.4517764|-0.52714197|-0.39833988|          -1.733856|         -0.38553849|        -0.18841312|         -1.0016032|         0.43391649|       -0.24527171|           2.284367|       -0.16116223|       -0.41174948|         0.43112298|        0.20041119|        -0.76391617|-0.52714197|          -2.5352593|        -0.48513889|         0.49483163|          -2.2205694|0.046858289000000004|         -0.42726161|-1.8963396000000001|        0.032733206|         -0.38553849|           -1.566125|          0.05305785|          0.81751221| -3.0657476000000004|        0.46156843|         0.54709936|-0.52714197|         -2.5233465| 4.0| 1.0| 0.0| 3.0| 1.0| 1.0| 2.0| 0.0| 1.0| 3.0| 2.0| 3.0| 3.0| 0.0| 4.0|         -1.3305146|       -0.69801946|-0.5803208000000001|        -0.82787622|         2.0634376| 0.96341206|          -0.6534188|          -1.733856|         -2.6739718|         1.2508656|1.8722169999999998|       -0.71007047|            1.475853|0.046858289000000004|       -0.41174948|         1.7521633|          1.142451|  1.7081747999999999|         0.84521305|          1.3043354|         -1.3305146|         -1.6116899|        -0.33274461|         -1.1142975|-0.52714197|        -0.28905795|         1.7521633|       -0.39097099|         -1.6116899|  1.4955921|          0.3404007|         1.4517764|         -2.4723905|         1.2508656|1.1214783000000002|  0|\n",
      "|[0.0,4.8987654822...|0.0|6.0|0.0|5.0|5.0|3.0|1.0|1.0|1.0|5.0| 2.0| 6.0| 6.0| 0.0| 1.0| 0.72805326| -1.6814988999999998|         1.0356811|-0.50794988|  -1.020573|          -1.126822|        -0.086949927|        -0.14039511|          1.0138609|        -0.90300849|       -0.17688446|             1.0902|        0.62227148|       -0.29122291|         0.29987197|       -0.95600543|        -0.46429518|-0.50794988|  1.3928341999999998|           1.087676|          2.2766634|           -1.413741|          0.18599414|         -0.88514471|         -1.1852143|         0.96312516|        -0.086949927|         -0.98151252|          0.10325883|          0.61767631|          0.03050127|         1.1439427|         0.59477654|-0.50794988| 1.3485129999999999| 4.0| 1.0| 6.0| 3.0| 3.0| 3.0| 6.0| 6.0| 0.0| 2.0| 3.0| 4.0| 1.0| 3.0| 5.0|        -0.96342684|        -0.6472802|        -0.83643609|        -0.54584744|         1.2974958| 0.60867978|         -0.47279196|          -1.126822|        0.079204526|        0.98476568|         1.4889195|       -0.48974917|          0.93998115|          0.18599414|       -0.29122291|         1.2913829|       -0.83932137|           1.0675763|         -1.1871952|         0.99518094|        -0.96342684|         -1.1499648|-1.6055898000000002|-0.8761023000000001|-0.50794988|        -0.37833571|         1.2913829|        -1.1312062|         -1.1499648| -1.0601698|         0.75527909|         1.0356811|-1.5809043999999999|        0.98476568|1.4558043999999999|  0|\n",
      "|[1.80386089263758...|0.0|5.0|3.0|6.0|2.0|4.0|1.0|3.0|2.0|0.0| 1.0| 2.0| 2.0| 0.0| 3.0| 0.53304849|         -0.64539181|        0.39889316| -0.1940624| 0.74365328|        -0.95431305|-0.03573456399999...|        -0.24911862|          1.8186014|-1.9996228999999999|       -0.20970714|         -1.5108161|        0.85818082|       -0.32759685|         0.11558763|         2.0656449|        -0.17978678| -0.1940624|           1.3134665|-1.2797063999999998|         0.58781475|          -1.0966018|         0.069814135|         -0.53942085|        -0.84203544|         -0.3996151|-0.03573456399999...|         -0.70687282| 0.08580463699999999|          0.23748678|  1.5729313999999999|         1.4146444|         0.22663704| -0.1940624|         -2.0162269| 0.0| 1.0| 3.0| 3.0| 3.0| 4.0| 2.0| 5.0| 1.0| 2.0| 5.0| 3.0| 1.0| 4.0| 1.0|        -0.37089421|       -0.24751746|        -0.23435268|        -0.48906031|        0.94975439| 0.45575793|         -0.18201669|        -0.95431305|        -0.75863992|        0.37817967|         0.5716278|       -0.61087169|          0.73040942|         0.069814135|       -0.32759685|        0.49688244|         1.6440749|          0.75833528|-1.6478738000000002|0.38252220000000003|        -0.37089421|        -0.44290717|-1.1551768999999998|        -0.33646208| -0.1940624| 1.1376908000000001|        0.49688244|         1.2996586|        -0.44290717|  1.1152408|         -1.5829357|        0.39889316|-1.2501871999999998|        0.37817967|        0.96274156|  0|\n",
      "|[-0.7545297188407...|1.0|0.0|3.0|4.0|2.0|3.0|3.0|5.0|6.0|5.0| 1.0| 2.0| 6.0| 0.0| 1.0| 0.67458771|         0.033119897|         1.0354242| 0.69814906| 0.25624292|          1.1804611|          -1.8067369|        0.011908246|           1.179175|        -0.78835878|        0.91774017|         -0.2123678|        -2.0392121|         1.2949723|         0.37007852|         1.0429217|-1.2018311000000002| 0.69814906|          -1.2195288|        -0.57629801|         0.50769932|          0.24970721| -1.2090530000000002|        -0.062359222|        -0.73586561|        -0.57771916|          -1.8067369|         -0.49253443|           -1.499683|          0.30232317|          0.16025105|         1.6495519|         -1.2750708| 0.69814906|        -0.88010037| 4.0| 5.0| 6.0| 1.0| 0.0| 2.0| 2.0| 3.0| 0.0| 4.0| 3.0| 6.0| 6.0| 4.0| 3.0|        -0.83286848|0.7182099000000001|         -0.4447192|         0.89925488|        0.46384707| 0.09275786|-0.41186720000000004|          1.1804611|         0.86669611|        0.13954267|        0.08683018|       -0.27792786|         -0.18180916| -1.2090530000000002|         1.2949723|        0.91243384|        0.84440318|          0.66434628|         0.68719893|         0.40350268|        -0.83286848|         -1.1478214|         0.71358852|          -0.133655| 0.69814906|         -1.1466114|        0.91243384|       -0.21465652|         -1.1478214| -1.4248979| 1.2550521000000001|         1.0354242|         0.57172005|        0.13954267|         1.6233964|  0|\n",
      "|[-2.3824865802355...|4.0|2.0|4.0|4.0|6.0|6.0|6.0|5.0|2.0|0.0| 1.0| 2.0| 2.0| 0.0| 5.0| 0.68573503|-0.09970291199999999|        0.98502995| 0.57186076|  1.0765441|         0.64451211| -1.5871627000000001|-1.7257403999999998|           1.135538|         0.57247962|        0.80011548|          2.8336841|        -1.3966087|         1.1111921|0.34668720000000003|        -1.8843186|-1.0868754999999999| 0.57186076|          0.69173137|          1.3979294| 0.5986300999999999|         -0.36640145|          -1.0434301|          -1.9608567|         -1.2091591|         -0.8585043| -1.5871627000000001|         -0.88982716|         -0.38952911|          0.31173554|          0.78598681|        0.51849118|         -1.0698997| 0.57186076|-1.4999676000000002| 3.0| 5.0| 1.0| 2.0| 2.0| 1.0| 6.0| 5.0| 5.0| 2.0| 5.0| 2.0| 0.0| 4.0| 3.0|        -0.80230775|        0.57874753|          1.3540763|         0.62466146|        0.99760502| 0.34886264|         -0.39647598|         0.64451211|         0.74979722|        0.19742867|        0.18989659|       -0.76003811|          0.22855651|          -1.0434301|         1.1111921|         0.8970071|        0.58293109|  1.0905936999999999|         0.55483748|         0.42913433|        -0.80230775|          -1.092099| 1.4093403999999998|        -0.18396281| 0.57186076|-1.2809218999999998|         0.8970071|       -0.75964445|          -1.092099|-0.75475553|        -0.38415834|        0.98502995|        -0.13061351|        0.19742867|      -0.078188374|  0|\n",
      "|[-1.5144610329295...|4.0|2.0|0.0|6.0|5.0|3.0|2.0|0.0|6.0|0.0| 1.0| 4.0| 6.0| 0.0| 4.0|0.040566009|          0.57172005|        0.66434628| 0.83521042| 0.74857368|        -0.37775115|          -1.7115042|         -1.1814147|        -0.64561173|         0.45104207|       -0.07458617|        -0.65013969|2.1406205000000003|        -0.1182903|         0.26001768|         0.6855573|         -1.0079926| 0.83521042|         -0.21709496|          2.1580376|          1.9597927|         -0.44823506| -1.2237166000000002|          0.90805041|        -0.35604994|        -0.32424005|          -1.7115042|         -0.29729512|          0.33619602|          0.09275786|        -0.090110623|        -2.0450636|         -1.4185115| 0.83521042|          1.9083833| 0.0| 0.0| 6.0| 1.0| 1.0| 1.0| 3.0| 5.0| 0.0| 4.0| 3.0| 4.0| 6.0| 1.0| 4.0|        -0.49253443|        0.89925488|        -0.64710018|        -0.18982197|        0.39691335| 0.18880537|         -0.24473451|        -0.37775115|        -0.45675356|       -0.18180916|        -0.3944129|         0.1807417|          0.29835607| -1.2237166000000002|        -0.1182903|        0.46384707|1.3983446000000002|          0.32067862|        -0.34225032|          0.0689721|        -0.49253443|        -0.73586561|        -0.92783295|          0.1525918| 0.83521042|          1.3484671|        0.46384707|         1.1366076|        -0.73586561|-0.22244224|        -0.21299776|        0.66434628|        -0.50734031|       -0.18180916|       -0.82863208|  0|\n",
      "|[6.04733700911641...|0.0|5.0|3.0|6.0|2.0|4.0|1.0|3.0|2.0|0.0| 1.0| 6.0| 2.0| 0.0| 3.0| 0.53304849|          -1.3200329|        0.86601554|-0.36423309| 0.74365328|        -0.95431305|         -0.15898948|           1.494116|          1.8186014|         -1.0759878|       -0.20970714|         0.60374344|       -0.67153774|       -0.32759685|         0.25427289|       -0.18361762|        -0.42524256|-0.36423309|-0.03209193299999...|-1.2797063999999998|         0.14487062|          -1.0966018|         0.085536585|         -0.95606563|        -0.84203544|        -0.36746842|         -0.15898948|         -0.70687282| 0.08580463699999999|          0.50067466|  1.5729313999999999|         1.4146444|         0.40353625|-0.36423309|         -1.1566347| 0.0| 1.0| 3.0| 3.0| 3.0| 4.0| 2.0| 3.0| 1.0| 1.0| 2.0| 3.0| 1.0| 4.0| 1.0|-0.7990598000000001|       -0.47274327|        -0.23435268|        -0.48906031|        0.94975439| 0.45575793|         -0.39228831|        -0.95431305|        -0.75863992|        0.78104806|         1.1746793|       -0.61087169|          0.73040942|         0.085536585|       -0.32759685|1.0608322000000001|        0.44266564|          0.75833528|          2.2438306|          0.8024777|-0.7990598000000001|        -0.96148404|        -0.24356024|         -0.6953409|-0.36423309| 1.1376908000000001|1.0608322000000001|         -1.199539|        -0.96148404|  1.1152408|         0.72308551|        0.86601554|-1.2501871999999998|        0.78104806|        0.15986617|  0|\n",
      "|[6.56290267717840...|4.0|2.0|0.0|4.0|2.0|2.0|6.0|1.0|4.0|5.0| 1.0| 4.0| 6.0| 3.0| 3.0|-0.30927601|-0.20494339999999997|         1.2169845|  0.6490009| 0.16966973|0.42079750000000005|          -1.8788773|         0.28001268|-1.0192386999999998|        -0.81826024|0.6852883000000001|        -0.23378224|       -0.36919739|        0.94564683|         0.42497279|        0.76972216|         -1.3076287|  0.6490009|           -1.646134|         0.69923568|        -0.68079818|         -0.51307692| -1.2225991000000003|          0.22089363|         -1.2292601|       4.1128269E-4|          -1.8788773|         -0.91919087|-0.10848189999999999|          0.40017704|          0.53660116|1.6278648999999998|           -1.23266|  0.6490009|-2.9198459999999997| 1.0| 1.0| 0.0| 1.0| 2.0| 3.0| 6.0| 3.0| 4.0| 1.0| 5.0| 2.0| 1.0| 0.0| 4.0|        -0.99745076|        0.64990851|        -0.29631502|         0.48065837|         1.0568001| 0.39021838|         -0.49273835|0.42079750000000005|        -0.15708374|        0.28422216|        0.30147138|        0.77034377|          0.32779692| -1.2225991000000003|        0.94564683|         1.1262935|         0.2107574|           1.1085343|         0.81221637|         0.55839631|        -0.99745076|         -1.3493547|        -0.51058201|        -0.26268454|  0.6490009|          2.2247792|         1.1262935|        0.36342571|         -1.3493547|-0.49124877|-1.7565912000000001|         1.2169845|        -0.32646284|        0.28422216|        -1.4634459|  0|\n",
      "|[0.0,6.0,0.0,-0.1...|0.0|6.0|0.0|5.0|6.0|3.0|1.0|1.0|1.0|5.0| 2.0| 2.0| 0.0| 0.0| 1.0| 0.72805326|          -1.2928536|        0.76323339|-0.41210134|  -1.020573|          -1.126822|-0.01020895400000...|         0.97545072|          1.0138609|          1.2157335|       -0.17688446| 1.7399078999999997|       -0.64042011|       -0.29122291|         0.21878589|        0.26219349|        -0.31905235|-0.41210134|           1.6222237|           1.087676|         -1.4631227|           -1.413741|          0.18076045|         -0.90167717|         -1.1852143|-2.2437370999999997|-0.01020895400000...|         -0.98151252|          0.10325883|          0.46506221|          0.03050127|         1.1439427|         0.49687649|-0.41210134|-1.2679146000000001| 4.0| 1.0| 6.0| 3.0| 3.0| 4.0| 2.0| 0.0| 0.0| 1.0| 5.0| 3.0| 1.0| 3.0| 5.0|-0.7140683000000001|       -0.51977059|        -0.83643609|        -0.54584744|         1.2974958| 0.60867978|         -0.35032327|          -1.126822|        0.079204526|        0.75217826|1.1411468999999999|       -0.48974917|          0.93998115|          0.18076045|       -0.29122291|        0.96352936|       -0.94827461|           1.0675763|        -0.66183362|         0.75191207|-0.7140683000000001|        -0.84751168|         -1.0713986|        -0.66888172|-0.41210134|        -0.37833571|        0.96352936|        0.97092669|        -0.84751168| -1.0601698|          1.2130611|        0.76323339|-1.5809043999999999|        0.75217826|          1.376485|  0|\n",
      "|[1.0,0.0427561952...|1.0|6.0|4.0|4.0|4.0|3.0|1.0|3.0|4.0|5.0| 4.0| 4.0| 0.0| 1.0| 4.0|  1.3094091| -3.3618474999999997|         2.4020252|-0.79957742|  1.7035627|         -2.6455175|         -0.74142442|          1.5910463|        -0.24836328|          -1.616449|       -0.43179555|-1.9040631000000001|        0.38227585|       -0.70603966|         0.71754214|        0.14981603|-1.3083391000000002|-0.79957742|          0.13670157|        -0.63272869|        -0.32124925| -3.2913782999999994|-0.00645190209999...|-0.03931060899999...|         -2.7380292|        -0.86224586|         -0.74142442| -2.2700807999999997|          0.47861943|           1.3336329|          -1.0406753|        0.79127329|         0.79264051|-0.79957742|         0.23319034| 0.0| 0.0| 0.0| 2.0| 3.0| 1.0| 2.0| 5.0| 4.0| 3.0| 2.0| 4.0| 0.0| 1.0| 3.0|         -2.1935471|        -1.0727079|         0.49013353|         -1.2889049|3.0051207000000004|  1.4125732|          -1.0774482|         -2.6455175|-0.5601685000000001|          2.018741|         3.0132765|       -0.83706863|           2.1887577|-0.00645190209999...|       -0.70603966|          2.876234|        0.35470382|           2.4662328|-1.5709031999999998|          2.1224729|         -2.1935471|          -2.666497|         0.67311167|-1.7989689999999998|-0.79957742| 2.0014762999999998|          2.876234|       -0.28273106|          -2.666497|-0.69212212|         -2.0765278|         2.4020252|-3.6871507000000006|          2.018741|       -0.59008452|  0|\n",
      "|[4.93510846628099...|1.0|0.0|0.0|4.0|5.0|6.0|2.0|1.0|4.0|0.0| 4.0| 2.0| 6.0| 1.0| 3.0| -1.2521145|         -0.39986789|         1.2844685|  0.5558513|   -0.35994|        -0.22312673| -1.7989042000000002|         0.53842444| 1.2508518999999998|        -0.61961522|       -0.12700653|         -1.2890722|       -0.27849837|       -0.18197844|          0.4410125|       -0.87673613|           -1.30115|  0.5558513|          0.92356788|         0.56426482|         0.49510831|         -0.12530748| -1.1410151000000002|          0.36393755|        0.013649456|-1.8759168000000002| -1.7989042000000002|-0.00337182359999...|          0.72300704|          0.45611981|-0.03701582299999...|       0.090573139|         -1.1008022|  0.5558513|        -0.15620909| 3.0| 1.0| 0.0| 2.0| 1.0| 2.0| 3.0| 0.0| 5.0| 3.0| 2.0| 4.0| 6.0| 3.0| 3.0|         -1.0667161|        0.53974503|         0.33999239|        -0.14920416|       0.027972788|0.028800445|         -0.52657298|        -0.22312673|         0.54497985|        0.39046475|        0.46828738|        -1.3734171|         0.085297475| -1.1410151000000002|       -0.18197844|         1.2292929|        0.87882658|-0.01248541399999...|         -1.0878546|         0.65269203|         -1.0667161|-1.4243778999999999|        -0.67024069|        -0.35673029|  0.5558513|           1.062714|         1.2292929|      -0.092464829|-1.4243778999999999| 0.63998034|        -0.33352429|         1.2844685|        -0.17685524|        0.39046475|        0.32172875|  0|\n",
      "|[-3.8832882517811...|6.0|6.0|3.0|1.0|4.0|6.0|1.0|3.0|6.0|2.0| 4.0| 6.0| 0.0| 3.0| 3.0|   0.653943|          -3.1736978|          2.149104|-0.83205464| 0.37370196|         -1.3019876|         -0.49697775|          -1.215916|          -1.010729|         0.15846115|       -0.22567857|-1.0533573999999999|        0.16487867|       -0.36527701|          0.6351894|        -1.3243546|-1.0992153999999998|-0.83205464|          -1.5571525|         -1.3221825| 1.4418853999999999|          -1.5977046|          0.12918319|          0.73562089|-1.3119566999999999|        -0.90771808|         -0.49697775|          -1.0898697|            -0.57408|           1.2237013|         -0.62172731|       -0.27945335|         0.89005869|-0.83205464|        -0.74620849| 1.0| 1.0| 6.0| 3.0| 2.0| 4.0| 6.0| 5.0| 0.0| 1.0| 3.0| 3.0| 3.0| 1.0| 3.0|-1.9751849999999997|        -1.0918424|        -0.50828508|        -0.64022054|1.4461818000000002| 0.68206122|           -0.969881|         -1.3019876|        -0.43387666|         1.8879229|2.8315992999999997|         2.1479885|           1.0627555|          0.12918319|       -0.36527701|         2.6100105|       0.072558716|           1.1816947|         0.45721486|          1.9562037|-1.9751849999999997|          -2.385908|         0.17889718|         -1.6813517|-0.83205464|         -1.7747131|         2.6100105|1.9827328000000002|          -2.385908| 0.88601971|          1.3050527|          2.149104|         -1.7951249|         1.8879229|        -2.0808487|  0|\n",
      "|[0.64240413536513...|4.0|2.0|0.0|4.0|5.0|2.0|6.0|1.0|4.0|5.0| 1.0| 4.0| 2.0| 3.0| 3.0|-0.30927601|       -0.0040263569|         1.2189872| 0.79165508| 0.16966973|0.42079750000000005| -2.0838818999999997|        -0.45677553|-1.0192386999999998|         0.79011877|0.6852883000000001|        -0.22141107|        0.46546669|        0.94564683|         0.43392341|      -0.064597481|         -1.3963838| 0.79165508|         -0.98082457|         0.69923568|        -0.78463809|         -0.51307692|          -1.3883899|  1.2641448999999998|         -1.2292601|        -0.97599026| -2.0838818999999997|         -0.91919087|-0.10848189999999999|          0.36382982|          0.53660116|1.6278648999999998|         -1.4541989| 0.79165508|         -1.2737239| 1.0| 1.0| 0.0| 1.0| 2.0| 3.0| 3.0| 0.0| 4.0| 2.0| 3.0| 4.0| 1.0| 0.0| 4.0|        -0.98379217|        0.81127331|        -0.29631502|         0.48065837|         1.0568001| 0.39021838|         -0.48641017|0.42079750000000005|        -0.15708374|        0.18548592|        0.13739981|        0.77034377|          0.32779692|          -1.3883899|        0.94564683|         1.0836944|       -0.88082235|           1.1085343|         0.55073041|         0.48987876|        -0.98379217|-1.3513571000000002|        -0.50126828|        -0.17597598| 0.79165508|          2.2247792|         1.0836944|         1.4124671|-1.3513571000000002|-0.49124877|          1.6852159|         1.2189872|        -0.32646284|        0.18548592|        -2.8630655|  0|\n",
      "+--------------------+---+---+---+---+---+---+---+---+---+---+----+----+----+----+----+-----------+--------------------+------------------+-----------+-----------+-------------------+--------------------+-------------------+-------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------+------------------+-------------------+-----------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+-------------------+-------------------+--------------------+--------------------+--------------------+--------------------+--------------------+------------------+-------------------+-----------+-------------------+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+-------------------+------------------+-------------------+-------------------+------------------+-----------+--------------------+-------------------+-------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-----------+-------------------+------------------+------------------+-------------------+-----------+-------------------+------------------+-------------------+------------------+------------------+---+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "result_df.show()\n",
    "labels = y.distinct().toPandas().to_numpy().reshape(-1)\n",
    "label_index = 'y'\n",
    "count_map = {}\n",
    "for label in labels:\n",
    "    count_map[label] = result_df.filter(f\"{label_index} == {label}\").count()\n",
    "print(count_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "interesting-oracle",
   "metadata": {},
   "outputs": [],
   "source": [
    "rpd = result_df.toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "norwegian-offense",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X_0</th>\n",
       "      <th>X_1</th>\n",
       "      <th>X_2</th>\n",
       "      <th>X_3</th>\n",
       "      <th>X_4</th>\n",
       "      <th>X_5</th>\n",
       "      <th>X_6</th>\n",
       "      <th>X_7</th>\n",
       "      <th>X_8</th>\n",
       "      <th>X_9</th>\n",
       "      <th>...</th>\n",
       "      <th>X_91</th>\n",
       "      <th>X_92</th>\n",
       "      <th>X_93</th>\n",
       "      <th>X_94</th>\n",
       "      <th>X_95</th>\n",
       "      <th>X_96</th>\n",
       "      <th>X_97</th>\n",
       "      <th>X_98</th>\n",
       "      <th>X_99</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-3.521899</td>\n",
       "      <td>-0.443056</td>\n",
       "      <td>3.490336</td>\n",
       "      <td>-2.552780</td>\n",
       "      <td>0.408416</td>\n",
       "      <td>-3.145233</td>\n",
       "      <td>3.296138</td>\n",
       "      <td>-2.098230</td>\n",
       "      <td>1.660156</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>151</th>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.013623</td>\n",
       "      <td>0.078930</td>\n",
       "      <td>-0.182991</td>\n",
       "      <td>1.205845</td>\n",
       "      <td>1.054715</td>\n",
       "      <td>0.165779</td>\n",
       "      <td>0.728376</td>\n",
       "      <td>-0.334082</td>\n",
       "      <td>0.218552</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.658649</td>\n",
       "      <td>-0.973172</td>\n",
       "      <td>1.031333</td>\n",
       "      <td>-1.663666</td>\n",
       "      <td>-0.861600</td>\n",
       "      <td>-0.931059</td>\n",
       "      <td>-0.762388</td>\n",
       "      <td>0.235645</td>\n",
       "      <td>-1.245188</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.802696</td>\n",
       "      <td>0.249789</td>\n",
       "      <td>2.138472</td>\n",
       "      <td>1.598665</td>\n",
       "      <td>-0.489663</td>\n",
       "      <td>-1.928611</td>\n",
       "      <td>0.405400</td>\n",
       "      <td>-0.490158</td>\n",
       "      <td>-0.183020</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.625701</td>\n",
       "      <td>0.865158</td>\n",
       "      <td>1.925299</td>\n",
       "      <td>-0.502072</td>\n",
       "      <td>-0.606150</td>\n",
       "      <td>-1.736346</td>\n",
       "      <td>0.377291</td>\n",
       "      <td>-0.447360</td>\n",
       "      <td>-1.374184</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>345</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.760588</td>\n",
       "      <td>0.669095</td>\n",
       "      <td>0.313475</td>\n",
       "      <td>-0.948553</td>\n",
       "      <td>-0.986428</td>\n",
       "      <td>-0.280506</td>\n",
       "      <td>2.315400</td>\n",
       "      <td>-1.183553</td>\n",
       "      <td>1.200655</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>346</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.257651</td>\n",
       "      <td>0.886359</td>\n",
       "      <td>-0.596238</td>\n",
       "      <td>0.868482</td>\n",
       "      <td>-1.651875</td>\n",
       "      <td>0.538814</td>\n",
       "      <td>1.000409</td>\n",
       "      <td>-0.412019</td>\n",
       "      <td>-0.885969</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>347</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.246718</td>\n",
       "      <td>1.459281</td>\n",
       "      <td>1.144018</td>\n",
       "      <td>-1.851572</td>\n",
       "      <td>0.840346</td>\n",
       "      <td>-1.030494</td>\n",
       "      <td>1.500145</td>\n",
       "      <td>-0.894590</td>\n",
       "      <td>0.852191</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>348</th>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.262651</td>\n",
       "      <td>0.740005</td>\n",
       "      <td>1.001378</td>\n",
       "      <td>-0.259923</td>\n",
       "      <td>-1.812421</td>\n",
       "      <td>-0.901247</td>\n",
       "      <td>2.092060</td>\n",
       "      <td>-1.166906</td>\n",
       "      <td>-1.562078</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>349</th>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.624356</td>\n",
       "      <td>0.958149</td>\n",
       "      <td>0.875527</td>\n",
       "      <td>0.216877</td>\n",
       "      <td>0.218380</td>\n",
       "      <td>-0.790111</td>\n",
       "      <td>-0.350810</td>\n",
       "      <td>0.053984</td>\n",
       "      <td>0.023443</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 101 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     X_0  X_1  X_2  X_3  X_4  X_5  X_6  X_7  X_8  X_9  ...      X_91  \\\n",
       "150  1.0  1.0  0.0  0.0  4.0  4.0  3.0  1.0  5.0  1.0  ... -3.521899   \n",
       "151  2.0  4.0  2.0  0.0  4.0  5.0  4.0  6.0  1.0  6.0  ... -0.013623   \n",
       "152  1.0  6.0  5.0  3.0  6.0  6.0  3.0  1.0  0.0  1.0  ... -0.658649   \n",
       "153  2.0  0.0  2.0  4.0  1.0  2.0  4.0  3.0  5.0  1.0  ... -1.802696   \n",
       "154  2.0  6.0  5.0  0.0  1.0  4.0  3.0  3.0  3.0  2.0  ... -1.625701   \n",
       "..   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...       ...   \n",
       "345  3.0  0.0  5.0  3.0  6.0  5.0  3.0  1.0  0.0  4.0  ... -0.760588   \n",
       "346  0.0  6.0  5.0  3.0  6.0  6.0  2.0  6.0  3.0  4.0  ...  0.257651   \n",
       "347  0.0  6.0  0.0  3.0  4.0  4.0  6.0  2.0  3.0  4.0  ... -1.246718   \n",
       "348  0.0  4.0  5.0  2.0  1.0  2.0  6.0  3.0  1.0  4.0  ... -1.262651   \n",
       "349  2.0  4.0  6.0  3.0  5.0  5.0  4.0  2.0  3.0  4.0  ... -0.624356   \n",
       "\n",
       "         X_92      X_93      X_94      X_95      X_96      X_97      X_98  \\\n",
       "150 -0.443056  3.490336 -2.552780  0.408416 -3.145233  3.296138 -2.098230   \n",
       "151  0.078930 -0.182991  1.205845  1.054715  0.165779  0.728376 -0.334082   \n",
       "152 -0.973172  1.031333 -1.663666 -0.861600 -0.931059 -0.762388  0.235645   \n",
       "153  0.249789  2.138472  1.598665 -0.489663 -1.928611  0.405400 -0.490158   \n",
       "154  0.865158  1.925299 -0.502072 -0.606150 -1.736346  0.377291 -0.447360   \n",
       "..        ...       ...       ...       ...       ...       ...       ...   \n",
       "345  0.669095  0.313475 -0.948553 -0.986428 -0.280506  2.315400 -1.183553   \n",
       "346  0.886359 -0.596238  0.868482 -1.651875  0.538814  1.000409 -0.412019   \n",
       "347  1.459281  1.144018 -1.851572  0.840346 -1.030494  1.500145 -0.894590   \n",
       "348  0.740005  1.001378 -0.259923 -1.812421 -0.901247  2.092060 -1.166906   \n",
       "349  0.958149  0.875527  0.216877  0.218380 -0.790111 -0.350810  0.053984   \n",
       "\n",
       "         X_99  y  \n",
       "150  1.660156  1  \n",
       "151  0.218552  1  \n",
       "152 -1.245188  1  \n",
       "153 -0.183020  1  \n",
       "154 -1.374184  1  \n",
       "..        ... ..  \n",
       "345  1.200655  1  \n",
       "346 -0.885969  1  \n",
       "347  0.852191  1  \n",
       "348 -1.562078  1  \n",
       "349  0.023443  1  \n",
       "\n",
       "[200 rows x 101 columns]"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rpd[(rpd.y == \"1\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "practical-entrepreneur",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'0': 100, '1': 200}"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "waiting-contest",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
